summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.ci/scripts/common/post-upload.sh4
-rw-r--r--.ci/scripts/common/pre-upload.sh3
-rwxr-xr-x.ci/scripts/linux/docker.sh44
-rw-r--r--.ci/scripts/linux/upload.sh5
-rw-r--r--.ci/templates/build-msvc.yml2
-rw-r--r--CMakeLists.txt10
-rw-r--r--README.md1
m---------externals/dynarmic0
-rw-r--r--src/CMakeLists.txt6
-rw-r--r--src/audio_core/algorithm/interpolate.cpp2
-rw-r--r--src/audio_core/audio_renderer.cpp13
-rw-r--r--src/audio_core/audio_renderer.h8
-rw-r--r--src/audio_core/stream.cpp12
-rw-r--r--src/common/CMakeLists.txt4
-rw-r--r--src/common/concepts.h4
-rw-r--r--src/common/div_ceil.h10
-rw-r--r--src/common/memory_hook.cpp11
-rw-r--r--src/common/memory_hook.h47
-rw-r--r--src/common/page_table.cpp10
-rw-r--r--src/common/page_table.h88
-rw-r--r--src/common/swap.h4
-rw-r--r--src/common/thread_worker.cpp58
-rw-r--r--src/common/thread_worker.h30
-rw-r--r--src/common/virtual_buffer.h10
-rw-r--r--src/core/CMakeLists.txt5
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.cpp13
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.cpp4
-rw-r--r--src/core/core.cpp3
-rw-r--r--src/core/crypto/key_manager.cpp11
-rw-r--r--src/core/file_sys/nca_patch.cpp2
-rw-r--r--src/core/file_sys/registered_cache.cpp3
-rw-r--r--src/core/file_sys/registered_cache.h8
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp37
-rw-r--r--src/core/hle/kernel/hle_ipc.h17
-rw-r--r--src/core/hle/kernel/k_priority_queue.h8
-rw-r--r--src/core/hle/kernel/k_scheduler_lock.h1
-rw-r--r--src/core/hle/kernel/kernel.cpp113
-rw-r--r--src/core/hle/kernel/kernel.h17
-rw-r--r--src/core/hle/kernel/memory/address_space_info.cpp2
-rw-r--r--src/core/hle/kernel/memory/memory_block.h14
-rw-r--r--src/core/hle/kernel/memory/page_table.cpp14
-rw-r--r--src/core/hle/kernel/server_session.cpp29
-rw-r--r--src/core/hle/kernel/server_session.h12
-rw-r--r--src/core/hle/kernel/service_thread.cpp110
-rw-r--r--src/core/hle/kernel/service_thread.h28
-rw-r--r--src/core/hle/kernel/svc.cpp2
-rw-r--r--src/core/hle/kernel/svc_types.h4
-rw-r--r--src/core/hle/service/am/am.cpp6
-rw-r--r--src/core/hle/service/am/am.h2
-rw-r--r--src/core/hle/service/apm/interface.cpp10
-rw-r--r--src/core/hle/service/apm/interface.h1
-rw-r--r--src/core/hle/service/audio/audout_u.cpp6
-rw-r--r--src/core/hle/service/audio/audren_u.cpp14
-rw-r--r--src/core/hle/service/hid/hid.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdevice.h9
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp9
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp9
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp20
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.h11
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp14
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.h11
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp31
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h14
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp9
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.cpp13
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.h12
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.cpp8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.h8
-rw-r--r--src/core/hle/service/nvdrv/interface.cpp92
-rw-r--r--src/core/hle/service/nvdrv/nvdata.h11
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp19
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.h6
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp52
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h19
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp34
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h9
-rw-r--r--src/core/hle/service/pcie/pcie.cpp2
-rw-r--r--src/core/hle/service/service.cpp28
-rw-r--r--src/core/hle/service/service.h16
-rw-r--r--src/core/hle/service/sockets/blocking_worker.h161
-rw-r--r--src/core/hle/service/sockets/bsd.cpp124
-rw-r--r--src/core/hle/service/sockets/bsd.h9
-rw-r--r--src/core/hle/service/sockets/sockets_translate.cpp1
-rw-r--r--src/core/hle/service/vi/vi.cpp45
-rw-r--r--src/core/loader/loader.cpp4
-rw-r--r--src/core/loader/loader.h1
-rw-r--r--src/core/memory.cpp217
-rw-r--r--src/core/memory.h34
-rw-r--r--src/core/settings.cpp5
-rw-r--r--src/core/settings.h6
-rw-r--r--src/input_common/gcadapter/gc_adapter.h6
-rw-r--r--src/input_common/gcadapter/gc_poller.cpp28
-rw-r--r--src/input_common/motion_input.cpp2
-rw-r--r--src/input_common/mouse/mouse_input.h2
-rw-r--r--src/input_common/mouse/mouse_poller.cpp25
-rw-r--r--src/input_common/sdl/sdl_impl.cpp73
-rw-r--r--src/input_common/udp/client.cpp5
-rw-r--r--src/input_common/udp/udp.cpp8
-rw-r--r--src/tests/CMakeLists.txt2
-rw-r--r--src/tests/common/fibers.cpp4
-rw-r--r--src/tests/common/ring_buffer.cpp30
-rw-r--r--src/tests/core/arm/arm_test_common.cpp145
-rw-r--r--src/tests/core/arm/arm_test_common.h93
-rw-r--r--src/video_core/CMakeLists.txt73
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h19
-rw-r--r--src/video_core/cdma_pusher.cpp15
-rw-r--r--src/video_core/cdma_pusher.h10
-rw-r--r--src/video_core/command_classes/host1x.cpp19
-rw-r--r--src/video_core/command_classes/host1x.h49
-rw-r--r--src/video_core/command_classes/vic.cpp10
-rw-r--r--src/video_core/compatible_formats.cpp145
-rw-r--r--src/video_core/compatible_formats.h23
-rw-r--r--src/video_core/delayed_destruction_ring.h32
-rw-r--r--src/video_core/dirty_flags.cpp7
-rw-r--r--src/video_core/dirty_flags.h3
-rw-r--r--src/video_core/engines/fermi_2d.cpp89
-rw-r--r--src/video_core/engines/fermi_2d.h331
-rw-r--r--src/video_core/engines/kepler_compute.cpp26
-rw-r--r--src/video_core/engines/kepler_compute.h5
-rw-r--r--src/video_core/engines/maxwell_3d.cpp45
-rw-r--r--src/video_core/engines/maxwell_3d.h127
-rw-r--r--src/video_core/engines/maxwell_dma.cpp3
-rw-r--r--src/video_core/fence_manager.h17
-rw-r--r--src/video_core/framebuffer_config.h31
-rw-r--r--src/video_core/gpu.cpp79
-rw-r--r--src/video_core/gpu.h55
-rw-r--r--src/video_core/gpu_asynch.cpp86
-rw-r--r--src/video_core/gpu_asynch.h47
-rw-r--r--src/video_core/gpu_synch.cpp61
-rw-r--r--src/video_core/gpu_synch.h41
-rw-r--r--src/video_core/gpu_thread.cpp36
-rw-r--r--src/video_core/gpu_thread.h10
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt67
-rw-r--r--src/video_core/host_shaders/block_linear_unswizzle_2d.comp122
-rw-r--r--src/video_core/host_shaders/block_linear_unswizzle_3d.comp125
-rw-r--r--src/video_core/host_shaders/convert_depth_to_float.frag13
-rw-r--r--src/video_core/host_shaders/convert_float_to_depth.frag13
-rw-r--r--src/video_core/host_shaders/full_screen_triangle.vert29
-rw-r--r--src/video_core/host_shaders/opengl_copy_bc4.comp70
-rw-r--r--src/video_core/host_shaders/opengl_present.frag4
-rw-r--r--src/video_core/host_shaders/opengl_present.vert4
-rw-r--r--src/video_core/host_shaders/pitch_unswizzle.comp86
-rw-r--r--src/video_core/host_shaders/vulkan_blit_color_float.frag14
-rw-r--r--src/video_core/host_shaders/vulkan_blit_depth_stencil.frag16
-rw-r--r--src/video_core/host_shaders/vulkan_present.frag (renamed from src/video_core/renderer_vulkan/shaders/blit.frag)9
-rw-r--r--src/video_core/host_shaders/vulkan_present.vert (renamed from src/video_core/renderer_vulkan/shaders/blit.vert)9
-rw-r--r--src/video_core/host_shaders/vulkan_quad_array.comp (renamed from src/video_core/renderer_vulkan/shaders/quad_array.comp)9
-rw-r--r--src/video_core/host_shaders/vulkan_quad_indexed.comp (renamed from src/video_core/renderer_vulkan/shaders/quad_indexed.comp)9
-rw-r--r--src/video_core/host_shaders/vulkan_uint8.comp (renamed from src/video_core/renderer_vulkan/shaders/uint8.comp)9
-rw-r--r--src/video_core/memory_manager.cpp5
-rw-r--r--src/video_core/morton.cpp250
-rw-r--r--src/video_core/morton.h18
-rw-r--r--src/video_core/rasterizer_interface.h12
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h8
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp68
-rw-r--r--src/video_core/renderer_opengl/gl_device.h18
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h4
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.cpp85
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.h68
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp504
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h63
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.cpp52
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.h25
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h6
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h15
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp32
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h19
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp1329
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h290
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h13
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp49
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h1
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp224
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h51
-rw-r--r--src/video_core/renderer_opengl/utils.cpp42
-rw-r--r--src/video_core/renderer_opengl/utils.h16
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp624
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h96
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp15
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h12
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp44
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h14
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp309
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h18
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp307
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h12
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp75
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h18
-rw-r--r--src/video_core/renderer_vulkan/vk_command_pool.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_command_pool.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp337
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h16
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp15
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h20
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp83
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h34
-rw-r--r--src/video_core/renderer_vulkan/vk_image.cpp135
-rw-r--r--src/video_core/renderer_vulkan/vk_image.h84
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.h32
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp34
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h22
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp24
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h16
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp714
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h137
-rw-r--r--src/video_core/renderer_vulkan/vk_renderpass_cache.cpp158
-rw-r--r--src/video_core/renderer_vulkan/vk_renderpass_cache.h70
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp83
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.h29
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp85
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h22
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp25
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h14
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_util.cpp15
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_util.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp23
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp24
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h18
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp1475
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h335
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h38
-rw-r--r--src/video_core/sampler_cache.cpp21
-rw-r--r--src/video_core/sampler_cache.h60
-rw-r--r--src/video_core/shader/async_shaders.cpp11
-rw-r--r--src/video_core/shader/async_shaders.h12
-rw-r--r--src/video_core/shader/decode.cpp6
-rw-r--r--src/video_core/shader/decode/half_set.cpp14
-rw-r--r--src/video_core/shader/decode/image.cpp11
-rw-r--r--src/video_core/shader/decode/texture.cpp56
-rw-r--r--src/video_core/shader/node.h33
-rw-r--r--src/video_core/shader/shader_ir.h18
-rw-r--r--src/video_core/surface.cpp2
-rw-r--r--src/video_core/surface.h152
-rw-r--r--src/video_core/texture_cache/accelerated_swizzle.cpp70
-rw-r--r--src/video_core/texture_cache/accelerated_swizzle.h45
-rw-r--r--src/video_core/texture_cache/copy_params.h36
-rw-r--r--src/video_core/texture_cache/decode_bc4.cpp97
-rw-r--r--src/video_core/texture_cache/decode_bc4.h16
-rw-r--r--src/video_core/texture_cache/descriptor_table.h82
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp380
-rw-r--r--src/video_core/texture_cache/format_lookup_table.h42
-rw-r--r--src/video_core/texture_cache/formatter.cpp95
-rw-r--r--src/video_core/texture_cache/formatter.h263
-rw-r--r--src/video_core/texture_cache/image_base.cpp218
-rw-r--r--src/video_core/texture_cache/image_base.h83
-rw-r--r--src/video_core/texture_cache/image_info.cpp189
-rw-r--r--src/video_core/texture_cache/image_info.h38
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp41
-rw-r--r--src/video_core/texture_cache/image_view_base.h47
-rw-r--r--src/video_core/texture_cache/image_view_info.cpp88
-rw-r--r--src/video_core/texture_cache/image_view_info.h50
-rw-r--r--src/video_core/texture_cache/render_targets.h51
-rw-r--r--src/video_core/texture_cache/samples_helper.h55
-rw-r--r--src/video_core/texture_cache/slot_vector.h156
-rw-r--r--src/video_core/texture_cache/surface_base.cpp299
-rw-r--r--src/video_core/texture_cache/surface_base.h333
-rw-r--r--src/video_core/texture_cache/surface_params.cpp445
-rw-r--r--src/video_core/texture_cache/surface_params.h294
-rw-r--r--src/video_core/texture_cache/surface_view.cpp27
-rw-r--r--src/video_core/texture_cache/surface_view.h68
-rw-r--r--src/video_core/texture_cache/texture_cache.h2404
-rw-r--r--src/video_core/texture_cache/types.h140
-rw-r--r--src/video_core/texture_cache/util.cpp1233
-rw-r--r--src/video_core/texture_cache/util.h109
-rw-r--r--src/video_core/textures/astc.cpp58
-rw-r--r--src/video_core/textures/astc.h5
-rw-r--r--src/video_core/textures/convert.cpp93
-rw-r--r--src/video_core/textures/convert.h22
-rw-r--r--src/video_core/textures/decoders.cpp249
-rw-r--r--src/video_core/textures/decoders.h44
-rw-r--r--src/video_core/textures/texture.cpp16
-rw-r--r--src/video_core/textures/texture.h239
-rw-r--r--src/video_core/video_core.cpp10
-rw-r--r--src/video_core/vulkan_common/nsight_aftermath_tracker.cpp (renamed from src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp)30
-rw-r--r--src/video_core/vulkan_common/nsight_aftermath_tracker.h (renamed from src/video_core/renderer_vulkan/nsight_aftermath_tracker.h)5
-rw-r--r--src/video_core/vulkan_common/vulkan_debug_callback.cpp45
-rw-r--r--src/video_core/vulkan_common/vulkan_debug_callback.h11
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp (renamed from src/video_core/renderer_vulkan/vk_device.cpp)289
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h (renamed from src/video_core/renderer_vulkan/vk_device.h)61
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.cpp151
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.h32
-rw-r--r--src/video_core/vulkan_common/vulkan_library.cpp36
-rw-r--r--src/video_core/vulkan_common/vulkan_library.h13
-rw-r--r--src/video_core/vulkan_common/vulkan_surface.cpp81
-rw-r--r--src/video_core/vulkan_common/vulkan_surface.h18
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp (renamed from src/video_core/renderer_vulkan/wrapper.cpp)125
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h (renamed from src/video_core/renderer_vulkan/wrapper.h)140
-rw-r--r--src/yuzu/applets/controller.cpp2
-rw-r--r--src/yuzu/applets/error.cpp6
-rw-r--r--src/yuzu/bootmanager.cpp11
-rw-r--r--src/yuzu/bootmanager.h2
-rw-r--r--src/yuzu/compatdb.cpp2
-rw-r--r--src/yuzu/configuration/config.cpp10
-rw-r--r--src/yuzu/configuration/configure_cpu.cpp3
-rw-r--r--src/yuzu/configuration/configure_cpu.ui12
-rw-r--r--src/yuzu/configuration/configure_input.cpp13
-rw-r--r--src/yuzu/configuration/configure_input_player.cpp117
-rw-r--r--src/yuzu/configuration/configure_input_player.h12
-rw-r--r--src/yuzu/configuration/configure_motion_touch.cpp2
-rw-r--r--src/yuzu/main.cpp66
-rw-r--r--src/yuzu/main.h3
-rw-r--r--src/yuzu/util/url_request_interceptor.cpp2
-rw-r--r--src/yuzu_cmd/config.cpp2
-rw-r--r--src/yuzu_cmd/default_ini.h2
-rw-r--r--src/yuzu_cmd/yuzu.cpp4
-rw-r--r--src/yuzu_tester/config.cpp2
-rw-r--r--src/yuzu_tester/default_ini.h2
-rw-r--r--src/yuzu_tester/yuzu.cpp2
336 files changed, 13097 insertions, 10473 deletions
diff --git a/.ci/scripts/common/post-upload.sh b/.ci/scripts/common/post-upload.sh
index e46ee0abb..99e79fcb6 100644
--- a/.ci/scripts/common/post-upload.sh
+++ b/.ci/scripts/common/post-upload.sh
@@ -15,5 +15,5 @@ mv "${REV_NAME}-source.tar.xz" $RELEASE_NAME
157z a "$REV_NAME.7z" $RELEASE_NAME 157z a "$REV_NAME.7z" $RELEASE_NAME
16 16
17# move the compiled archive into the artifacts directory to be uploaded by travis releases 17# move the compiled archive into the artifacts directory to be uploaded by travis releases
18mv "$ARCHIVE_NAME" artifacts/ 18mv "$ARCHIVE_NAME" "${ARTIFACTS_DIR}/"
19mv "$REV_NAME.7z" artifacts/ 19mv "$REV_NAME.7z" "${ARTIFACTS_DIR}/"
diff --git a/.ci/scripts/common/pre-upload.sh b/.ci/scripts/common/pre-upload.sh
index 3c2fc79a2..a49e3fff3 100644
--- a/.ci/scripts/common/pre-upload.sh
+++ b/.ci/scripts/common/pre-upload.sh
@@ -2,5 +2,6 @@
2 2
3GITDATE="`git show -s --date=short --format='%ad' | sed 's/-//g'`" 3GITDATE="`git show -s --date=short --format='%ad' | sed 's/-//g'`"
4GITREV="`git show -s --format='%h'`" 4GITREV="`git show -s --format='%h'`"
5ARTIFACTS_DIR="artifacts"
5 6
6mkdir -p artifacts 7mkdir -p "${ARTIFACTS_DIR}/"
diff --git a/.ci/scripts/linux/docker.sh b/.ci/scripts/linux/docker.sh
index e0c018cfd..30391f6ad 100755
--- a/.ci/scripts/linux/docker.sh
+++ b/.ci/scripts/linux/docker.sh
@@ -1,14 +1,54 @@
1#!/bin/bash -ex 1#!/bin/bash -ex
2 2
3# Exit on error, rather than continuing with the rest of the script.
4set -e
5
3cd /yuzu 6cd /yuzu
4 7
5ccache -s 8ccache -s
6 9
7mkdir build || true && cd build 10mkdir build || true && cd build
8cmake .. -G Ninja -DDISPLAY_VERSION=$1 -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON -DENABLE_QT_TRANSLATION=ON 11cmake .. -DDISPLAY_VERSION=$1 -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON -DENABLE_QT_TRANSLATION=ON -DCMAKE_INSTALL_PREFIX="/usr"
9 12
10ninja 13make -j$(nproc)
11 14
12ccache -s 15ccache -s
13 16
14ctest -VV -C Release 17ctest -VV -C Release
18
19make install DESTDIR=AppDir
20rm -vf AppDir/usr/bin/yuzu-cmd AppDir/usr/bin/yuzu-tester
21
22# Download tools needed to build an AppImage
23wget -nc https://github.com/linuxdeploy/linuxdeploy/releases/download/continuous/linuxdeploy-x86_64.AppImage
24wget -nc https://github.com/linuxdeploy/linuxdeploy-plugin-qt/releases/download/continuous/linuxdeploy-plugin-qt-x86_64.AppImage
25wget -nc https://github.com/AppImage/AppImageKit/releases/download/continuous/appimagetool-x86_64.AppImage
26wget -nc https://github.com/darealshinji/AppImageKit-checkrt/releases/download/continuous/AppRun-patched-x86_64
27wget -nc https://github.com/darealshinji/AppImageKit-checkrt/releases/download/continuous/exec-x86_64.so
28# Set executable bit
29chmod 755 \
30 appimagetool-x86_64.AppImage \
31 AppRun-patched-x86_64 \
32 exec-x86_64.so \
33 linuxdeploy-x86_64.AppImage \
34 linuxdeploy-plugin-qt-x86_64.AppImage
35
36# Workaround for https://github.com/AppImage/AppImageKit/issues/828
37export APPIMAGE_EXTRACT_AND_RUN=1
38
39mkdir -p AppDir/usr/optional
40mkdir -p AppDir/usr/optional/libstdc++
41mkdir -p AppDir/usr/optional/libgcc_s
42
43# Deploy yuzu's needed dependencies
44./linuxdeploy-x86_64.AppImage --appdir AppDir --plugin qt
45
46# Workaround for building yuzu with GCC 10 but also trying to distribute it to Ubuntu 18.04 et al.
47# See https://github.com/darealshinji/AppImageKit-checkrt
48cp exec-x86_64.so AppDir/usr/optional/exec.so
49cp AppRun-patched-x86_64 AppDir/AppRun
50cp --dereference /usr/lib/x86_64-linux-gnu/libstdc++.so.6 AppDir/usr/optional/libstdc++/libstdc++.so.6
51cp --dereference /lib/x86_64-linux-gnu/libgcc_s.so.1 AppDir/usr/optional/libgcc_s/libgcc_s.so.1
52
53# Build the AppImage
54./appimagetool-x86_64.AppImage AppDir
diff --git a/.ci/scripts/linux/upload.sh b/.ci/scripts/linux/upload.sh
index fe4e6b2ac..7175e4cb5 100644
--- a/.ci/scripts/linux/upload.sh
+++ b/.ci/scripts/linux/upload.sh
@@ -2,6 +2,8 @@
2 2
3. .ci/scripts/common/pre-upload.sh 3. .ci/scripts/common/pre-upload.sh
4 4
5APPIMAGE_NAME="yuzu-x86_64.AppImage"
6NEW_APPIMAGE_NAME="yuzu-${GITDATE}-${GITREV}-x86_64.AppImage"
5REV_NAME="yuzu-linux-${GITDATE}-${GITREV}" 7REV_NAME="yuzu-linux-${GITDATE}-${GITREV}"
6ARCHIVE_NAME="${REV_NAME}.tar.xz" 8ARCHIVE_NAME="${REV_NAME}.tar.xz"
7COMPRESSION_FLAGS="-cJvf" 9COMPRESSION_FLAGS="-cJvf"
@@ -17,4 +19,7 @@ mkdir "$DIR_NAME"
17cp build/bin/yuzu-cmd "$DIR_NAME" 19cp build/bin/yuzu-cmd "$DIR_NAME"
18cp build/bin/yuzu "$DIR_NAME" 20cp build/bin/yuzu "$DIR_NAME"
19 21
22# Copy the AppImage to the artifacts directory and avoid compressing it
23cp "build/${APPIMAGE_NAME}" "${ARTIFACTS_DIR}/${NEW_APPIMAGE_NAME}"
24
20. .ci/scripts/common/post-upload.sh 25. .ci/scripts/common/post-upload.sh
diff --git a/.ci/templates/build-msvc.yml b/.ci/templates/build-msvc.yml
index 33ff8201f..721179550 100644
--- a/.ci/templates/build-msvc.yml
+++ b/.ci/templates/build-msvc.yml
@@ -8,7 +8,7 @@ steps:
8 displayName: 'Install vulkan-sdk' 8 displayName: 'Install vulkan-sdk'
9- script: python -m pip install --upgrade pip conan 9- script: python -m pip install --upgrade pip conan
10 displayName: 'Install conan' 10 displayName: 'Install conan'
11- script: refreshenv && mkdir build && cd build && cmake -G "Visual Studio 16 2019" -A x64 --config Release -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_QT_WEB_ENGINE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON -DDISPLAY_VERSION=${{ parameters['version'] }} .. && cd .. 11- script: refreshenv && mkdir build && cd build && cmake -G "Visual Studio 16 2019" -A x64 --config Release -DYUZU_USE_BUNDLED_QT=1 -DYUZU_USE_QT_WEB_ENGINE=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DUSE_DISCORD_PRESENCE=ON -DENABLE_QT_TRANSLATION=ON -DDISPLAY_VERSION=${{ parameters['version'] }} .. && cd ..
12 displayName: 'Configure CMake' 12 displayName: 'Configure CMake'
13- task: MSBuild@1 13- task: MSBuild@1
14 displayName: 'Build' 14 displayName: 'Build'
diff --git a/CMakeLists.txt b/CMakeLists.txt
index eda555494..aaf3a90cf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -26,6 +26,10 @@ option(ENABLE_CUBEB "Enables the cubeb audio backend" ON)
26 26
27option(USE_DISCORD_PRESENCE "Enables Discord Rich Presence" OFF) 27option(USE_DISCORD_PRESENCE "Enables Discord Rich Presence" OFF)
28 28
29if (NOT ENABLE_WEB_SERVICE)
30 set(YUZU_ENABLE_BOXCAT OFF)
31endif()
32
29# Default to a Release build 33# Default to a Release build
30get_property(IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) 34get_property(IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
31if (NOT IS_MULTI_CONFIG AND NOT CMAKE_BUILD_TYPE) 35if (NOT IS_MULTI_CONFIG AND NOT CMAKE_BUILD_TYPE)
@@ -165,7 +169,7 @@ macro(yuzu_find_packages)
165 "lz4 1.8 lz4/1.9.2" 169 "lz4 1.8 lz4/1.9.2"
166 "nlohmann_json 3.8 nlohmann_json/3.8.0" 170 "nlohmann_json 3.8 nlohmann_json/3.8.0"
167 "ZLIB 1.2 zlib/1.2.11" 171 "ZLIB 1.2 zlib/1.2.11"
168 "zstd 1.4 zstd/1.4.5" 172 "zstd 1.4 zstd/1.4.8"
169 ) 173 )
170 174
171 foreach(PACKAGE ${REQUIRED_LIBS}) 175 foreach(PACKAGE ${REQUIRED_LIBS})
@@ -239,7 +243,7 @@ if(ENABLE_QT)
239 if (YUZU_USE_QT_WEB_ENGINE) 243 if (YUZU_USE_QT_WEB_ENGINE)
240 find_package(Qt5 COMPONENTS WebEngineCore WebEngineWidgets) 244 find_package(Qt5 COMPONENTS WebEngineCore WebEngineWidgets)
241 endif() 245 endif()
242 246
243 if (ENABLE_QT_TRANSLATION) 247 if (ENABLE_QT_TRANSLATION)
244 find_package(Qt5 REQUIRED COMPONENTS LinguistTools ${QT_PREFIX_HINT}) 248 find_package(Qt5 REQUIRED COMPONENTS LinguistTools ${QT_PREFIX_HINT})
245 endif() 249 endif()
@@ -322,7 +326,7 @@ if (CONAN_REQUIRED_LIBS)
322 list(APPEND Boost_LIBRARIES Boost::context) 326 list(APPEND Boost_LIBRARIES Boost::context)
323 endif() 327 endif()
324 endif() 328 endif()
325 329
326 # Due to issues with variable scopes in functions, we need to also find_package(qt5) outside of the function 330 # Due to issues with variable scopes in functions, we need to also find_package(qt5) outside of the function
327 if(ENABLE_QT) 331 if(ENABLE_QT)
328 list(APPEND CMAKE_MODULE_PATH "${CONAN_QT_ROOT_RELEASE}") 332 list(APPEND CMAKE_MODULE_PATH "${CONAN_QT_ROOT_RELEASE}")
diff --git a/README.md b/README.md
index 981c8ef24..fbf62eb7c 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,6 @@ If you want to contribute to the user interface translation, please check out th
30 30
31* __Windows__: [Windows Build](https://github.com/yuzu-emu/yuzu/wiki/Building-For-Windows) 31* __Windows__: [Windows Build](https://github.com/yuzu-emu/yuzu/wiki/Building-For-Windows)
32* __Linux__: [Linux Build](https://github.com/yuzu-emu/yuzu/wiki/Building-For-Linux) 32* __Linux__: [Linux Build](https://github.com/yuzu-emu/yuzu/wiki/Building-For-Linux)
33* __macOS__: [macOS Build](https://github.com/yuzu-emu/yuzu/wiki/Building-for-macOS)
34 33
35 34
36### Support 35### Support
diff --git a/externals/dynarmic b/externals/dynarmic
Subproject 0e1112b7df77ae55a62a51622940d5c8f9e8c84 Subproject 3806284cbefc4115436dcdc687776a45ec31309
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a22b564d6..61adbef28 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -45,10 +45,15 @@ if (MSVC)
45 45
46 # Warnings 46 # Warnings
47 /W3 47 /W3
48 /we4062 # enumerator 'identifier' in a switch of enum 'enumeration' is not handled
49 /we4101 # 'identifier': unreferenced local variable
50 /we4265 # 'class': class has virtual functions, but destructor is not virtual
51 /we4388 # signed/unsigned mismatch
48 /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect 52 /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect
49 /we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'? 53 /we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'?
50 /we4555 # Expression has no effect; expected expression with side-effect 54 /we4555 # Expression has no effect; expected expression with side-effect
51 /we4834 # Discarding return value of function with 'nodiscard' attribute 55 /we4834 # Discarding return value of function with 'nodiscard' attribute
56 /we5038 # data member 'member1' will be initialized after data member 'member2'
52 ) 57 )
53 58
54 # /GS- - No stack buffer overflow checks 59 # /GS- - No stack buffer overflow checks
@@ -62,6 +67,7 @@ else()
62 -Werror=implicit-fallthrough 67 -Werror=implicit-fallthrough
63 -Werror=missing-declarations 68 -Werror=missing-declarations
64 -Werror=reorder 69 -Werror=reorder
70 -Werror=uninitialized
65 -Werror=unused-result 71 -Werror=unused-result
66 -Wextra 72 -Wextra
67 -Wmissing-declarations 73 -Wmissing-declarations
diff --git a/src/audio_core/algorithm/interpolate.cpp b/src/audio_core/algorithm/interpolate.cpp
index 699fcb84c..3b4144e21 100644
--- a/src/audio_core/algorithm/interpolate.cpp
+++ b/src/audio_core/algorithm/interpolate.cpp
@@ -218,7 +218,7 @@ void Resample(s32* output, const s32* input, s32 pitch, s32& fraction, std::size
218 const auto l2 = lut[lut_index + 2]; 218 const auto l2 = lut[lut_index + 2];
219 const auto l3 = lut[lut_index + 3]; 219 const auto l3 = lut[lut_index + 3];
220 220
221 const auto s0 = static_cast<s32>(input[index]); 221 const auto s0 = static_cast<s32>(input[index + 0]);
222 const auto s1 = static_cast<s32>(input[index + 1]); 222 const auto s1 = static_cast<s32>(input[index + 1]);
223 const auto s2 = static_cast<s32>(input[index + 2]); 223 const auto s2 = static_cast<s32>(input[index + 2]);
224 const auto s3 = static_cast<s32>(input[index + 3]); 224 const auto s3 = static_cast<s32>(input[index + 3]);
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index 179560cd7..d2ce8c814 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -11,7 +11,6 @@
11#include "audio_core/info_updater.h" 11#include "audio_core/info_updater.h"
12#include "audio_core/voice_context.h" 12#include "audio_core/voice_context.h"
13#include "common/logging/log.h" 13#include "common/logging/log.h"
14#include "core/hle/kernel/writable_event.h"
15#include "core/memory.h" 14#include "core/memory.h"
16#include "core/settings.h" 15#include "core/settings.h"
17 16
@@ -71,10 +70,9 @@ namespace {
71namespace AudioCore { 70namespace AudioCore {
72AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_, 71AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_,
73 AudioCommon::AudioRendererParameter params, 72 AudioCommon::AudioRendererParameter params,
74 std::shared_ptr<Kernel::WritableEvent> buffer_event_, 73 Stream::ReleaseCallback&& release_callback,
75 std::size_t instance_number) 74 std::size_t instance_number)
76 : worker_params{params}, buffer_event{buffer_event_}, 75 : worker_params{params}, memory_pool_info(params.effect_count + params.voice_count * 4),
77 memory_pool_info(params.effect_count + params.voice_count * 4),
78 voice_context(params.voice_count), effect_context(params.effect_count), mix_context(), 76 voice_context(params.voice_count), effect_context(params.effect_count), mix_context(),
79 sink_context(params.sink_count), splitter_context(), 77 sink_context(params.sink_count), splitter_context(),
80 voices(params.voice_count), memory{memory_}, 78 voices(params.voice_count), memory{memory_},
@@ -85,10 +83,9 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory
85 params.num_splitter_send_channels); 83 params.num_splitter_send_channels);
86 mix_context.Initialize(behavior_info, params.submix_count + 1, params.effect_count); 84 mix_context.Initialize(behavior_info, params.submix_count + 1, params.effect_count);
87 audio_out = std::make_unique<AudioCore::AudioOut>(); 85 audio_out = std::make_unique<AudioCore::AudioOut>();
88 stream = 86 stream = audio_out->OpenStream(
89 audio_out->OpenStream(core_timing, params.sample_rate, AudioCommon::STREAM_NUM_CHANNELS, 87 core_timing, params.sample_rate, AudioCommon::STREAM_NUM_CHANNELS,
90 fmt::format("AudioRenderer-Instance{}", instance_number), 88 fmt::format("AudioRenderer-Instance{}", instance_number), std::move(release_callback));
91 [=]() { buffer_event_->Signal(); });
92 audio_out->StartStream(stream); 89 audio_out->StartStream(stream);
93 90
94 QueueMixedBuffer(0); 91 QueueMixedBuffer(0);
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 90f7eafa4..18567f618 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -27,10 +27,6 @@ namespace Core::Timing {
27class CoreTiming; 27class CoreTiming;
28} 28}
29 29
30namespace Kernel {
31class WritableEvent;
32}
33
34namespace Core::Memory { 30namespace Core::Memory {
35class Memory; 31class Memory;
36} 32}
@@ -44,8 +40,7 @@ class AudioRenderer {
44public: 40public:
45 AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_, 41 AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_,
46 AudioCommon::AudioRendererParameter params, 42 AudioCommon::AudioRendererParameter params,
47 std::shared_ptr<Kernel::WritableEvent> buffer_event_, 43 Stream::ReleaseCallback&& release_callback, std::size_t instance_number);
48 std::size_t instance_number);
49 ~AudioRenderer(); 44 ~AudioRenderer();
50 45
51 [[nodiscard]] ResultCode UpdateAudioRenderer(const std::vector<u8>& input_params, 46 [[nodiscard]] ResultCode UpdateAudioRenderer(const std::vector<u8>& input_params,
@@ -61,7 +56,6 @@ private:
61 BehaviorInfo behavior_info{}; 56 BehaviorInfo behavior_info{};
62 57
63 AudioCommon::AudioRendererParameter worker_params; 58 AudioCommon::AudioRendererParameter worker_params;
64 std::shared_ptr<Kernel::WritableEvent> buffer_event;
65 std::vector<ServerMemoryPoolInfo> memory_pool_info; 59 std::vector<ServerMemoryPoolInfo> memory_pool_info;
66 VoiceContext voice_context; 60 VoiceContext voice_context;
67 EffectContext effect_context; 61 EffectContext effect_context;
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index eca296589..afe68c9ed 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -130,7 +130,11 @@ bool Stream::ContainsBuffer([[maybe_unused]] Buffer::Tag tag) const {
130std::vector<Buffer::Tag> Stream::GetTagsAndReleaseBuffers(std::size_t max_count) { 130std::vector<Buffer::Tag> Stream::GetTagsAndReleaseBuffers(std::size_t max_count) {
131 std::vector<Buffer::Tag> tags; 131 std::vector<Buffer::Tag> tags;
132 for (std::size_t count = 0; count < max_count && !released_buffers.empty(); ++count) { 132 for (std::size_t count = 0; count < max_count && !released_buffers.empty(); ++count) {
133 tags.push_back(released_buffers.front()->GetTag()); 133 if (released_buffers.front()) {
134 tags.push_back(released_buffers.front()->GetTag());
135 } else {
136 ASSERT_MSG(false, "Invalid tag in released_buffers!");
137 }
134 released_buffers.pop(); 138 released_buffers.pop();
135 } 139 }
136 return tags; 140 return tags;
@@ -140,7 +144,11 @@ std::vector<Buffer::Tag> Stream::GetTagsAndReleaseBuffers() {
140 std::vector<Buffer::Tag> tags; 144 std::vector<Buffer::Tag> tags;
141 tags.reserve(released_buffers.size()); 145 tags.reserve(released_buffers.size());
142 while (!released_buffers.empty()) { 146 while (!released_buffers.empty()) {
143 tags.push_back(released_buffers.front()->GetTag()); 147 if (released_buffers.front()) {
148 tags.push_back(released_buffers.front()->GetTag());
149 } else {
150 ASSERT_MSG(false, "Invalid tag in released_buffers!");
151 }
144 released_buffers.pop(); 152 released_buffers.pop();
145 } 153 }
146 return tags; 154 return tags;
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 943ff996e..2c2bd2ee8 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -135,8 +135,6 @@ add_library(common STATIC
135 math_util.h 135 math_util.h
136 memory_detect.cpp 136 memory_detect.cpp
137 memory_detect.h 137 memory_detect.h
138 memory_hook.cpp
139 memory_hook.h
140 microprofile.cpp 138 microprofile.cpp
141 microprofile.h 139 microprofile.h
142 microprofileui.h 140 microprofileui.h
@@ -162,6 +160,8 @@ add_library(common STATIC
162 thread.cpp 160 thread.cpp
163 thread.h 161 thread.h
164 thread_queue_list.h 162 thread_queue_list.h
163 thread_worker.cpp
164 thread_worker.h
165 threadsafe_queue.h 165 threadsafe_queue.h
166 time_zone.cpp 166 time_zone.cpp
167 time_zone.h 167 time_zone.h
diff --git a/src/common/concepts.h b/src/common/concepts.h
index 5bef3ad67..aa08065a7 100644
--- a/src/common/concepts.h
+++ b/src/common/concepts.h
@@ -31,4 +31,8 @@ concept DerivedFrom = requires {
31 std::is_convertible_v<const volatile Derived*, const volatile Base*>; 31 std::is_convertible_v<const volatile Derived*, const volatile Base*>;
32}; 32};
33 33
34// TODO: Replace with std::convertible_to when libc++ implements it.
35template <typename From, typename To>
36concept ConvertibleTo = std::is_convertible_v<From, To>;
37
34} // namespace Common 38} // namespace Common
diff --git a/src/common/div_ceil.h b/src/common/div_ceil.h
index 6b2c48f91..95e1489a9 100644
--- a/src/common/div_ceil.h
+++ b/src/common/div_ceil.h
@@ -11,16 +11,16 @@ namespace Common {
11 11
12/// Ceiled integer division. 12/// Ceiled integer division.
13template <typename N, typename D> 13template <typename N, typename D>
14requires std::is_integral_v<N>&& std::is_unsigned_v<D>[[nodiscard]] constexpr auto DivCeil( 14requires std::is_integral_v<N>&& std::is_unsigned_v<D>[[nodiscard]] constexpr N DivCeil(N number,
15 N number, D divisor) { 15 D divisor) {
16 return (static_cast<D>(number) + divisor - 1) / divisor; 16 return static_cast<N>((static_cast<D>(number) + divisor - 1) / divisor);
17} 17}
18 18
19/// Ceiled integer division with logarithmic divisor in base 2 19/// Ceiled integer division with logarithmic divisor in base 2
20template <typename N, typename D> 20template <typename N, typename D>
21requires std::is_integral_v<N>&& std::is_unsigned_v<D>[[nodiscard]] constexpr auto DivCeilLog2( 21requires std::is_integral_v<N>&& std::is_unsigned_v<D>[[nodiscard]] constexpr N DivCeilLog2(
22 N value, D alignment_log2) { 22 N value, D alignment_log2) {
23 return (static_cast<D>(value) + (D(1) << alignment_log2) - 1) >> alignment_log2; 23 return static_cast<N>((static_cast<D>(value) + (D(1) << alignment_log2) - 1) >> alignment_log2);
24} 24}
25 25
26} // namespace Common 26} // namespace Common
diff --git a/src/common/memory_hook.cpp b/src/common/memory_hook.cpp
deleted file mode 100644
index 3986986d6..000000000
--- a/src/common/memory_hook.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
1// Copyright 2018 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/memory_hook.h"
6
7namespace Common {
8
9MemoryHook::~MemoryHook() = default;
10
11} // namespace Common
diff --git a/src/common/memory_hook.h b/src/common/memory_hook.h
deleted file mode 100644
index adaa4c2c5..000000000
--- a/src/common/memory_hook.h
+++ /dev/null
@@ -1,47 +0,0 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <optional>
9
10#include "common/common_types.h"
11
12namespace Common {
13
14/**
15 * Memory hooks have two purposes:
16 * 1. To allow reads and writes to a region of memory to be intercepted. This is used to implement
17 * texture forwarding and memory breakpoints for debugging.
18 * 2. To allow for the implementation of MMIO devices.
19 *
20 * A hook may be mapped to multiple regions of memory.
21 *
22 * If a std::nullopt or false is returned from a function, the read/write request is passed through
23 * to the underlying memory region.
24 */
25class MemoryHook {
26public:
27 virtual ~MemoryHook();
28
29 virtual std::optional<bool> IsValidAddress(VAddr addr) = 0;
30
31 virtual std::optional<u8> Read8(VAddr addr) = 0;
32 virtual std::optional<u16> Read16(VAddr addr) = 0;
33 virtual std::optional<u32> Read32(VAddr addr) = 0;
34 virtual std::optional<u64> Read64(VAddr addr) = 0;
35
36 virtual bool ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) = 0;
37
38 virtual bool Write8(VAddr addr, u8 data) = 0;
39 virtual bool Write16(VAddr addr, u16 data) = 0;
40 virtual bool Write32(VAddr addr, u32 data) = 0;
41 virtual bool Write64(VAddr addr, u64 data) = 0;
42
43 virtual bool WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size) = 0;
44};
45
46using MemoryHookPointer = std::shared_ptr<MemoryHook>;
47} // namespace Common
diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp
index bccea0894..8fd8620fd 100644
--- a/src/common/page_table.cpp
+++ b/src/common/page_table.cpp
@@ -10,16 +10,10 @@ PageTable::PageTable() = default;
10 10
11PageTable::~PageTable() noexcept = default; 11PageTable::~PageTable() noexcept = default;
12 12
13void PageTable::Resize(std::size_t address_space_width_in_bits, std::size_t page_size_in_bits, 13void PageTable::Resize(size_t address_space_width_in_bits, size_t page_size_in_bits) {
14 bool has_attribute) { 14 const size_t num_page_table_entries{1ULL << (address_space_width_in_bits - page_size_in_bits)};
15 const std::size_t num_page_table_entries{1ULL
16 << (address_space_width_in_bits - page_size_in_bits)};
17 pointers.resize(num_page_table_entries); 15 pointers.resize(num_page_table_entries);
18 backing_addr.resize(num_page_table_entries); 16 backing_addr.resize(num_page_table_entries);
19
20 if (has_attribute) {
21 attributes.resize(num_page_table_entries);
22 }
23} 17}
24 18
25} // namespace Common 19} // namespace Common
diff --git a/src/common/page_table.h b/src/common/page_table.h
index 9754fabf9..61c5552e0 100644
--- a/src/common/page_table.h
+++ b/src/common/page_table.h
@@ -4,10 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <atomic>
7#include <tuple> 8#include <tuple>
8 9
9#include "common/common_types.h" 10#include "common/common_types.h"
10#include "common/memory_hook.h"
11#include "common/virtual_buffer.h" 11#include "common/virtual_buffer.h"
12 12
13namespace Common { 13namespace Common {
@@ -20,27 +20,6 @@ enum class PageType : u8 {
20 /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and 20 /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
21 /// invalidation 21 /// invalidation
22 RasterizerCachedMemory, 22 RasterizerCachedMemory,
23 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
24 Special,
25 /// Page is allocated for use.
26 Allocated,
27};
28
29struct SpecialRegion {
30 enum class Type {
31 DebugHook,
32 IODevice,
33 } type;
34
35 MemoryHookPointer handler;
36
37 [[nodiscard]] bool operator<(const SpecialRegion& other) const {
38 return std::tie(type, handler) < std::tie(other.type, other.handler);
39 }
40
41 [[nodiscard]] bool operator==(const SpecialRegion& other) const {
42 return std::tie(type, handler) == std::tie(other.type, other.handler);
43 }
44}; 23};
45 24
46/** 25/**
@@ -48,6 +27,59 @@ struct SpecialRegion {
48 * mimics the way a real CPU page table works. 27 * mimics the way a real CPU page table works.
49 */ 28 */
50struct PageTable { 29struct PageTable {
30 /// Number of bits reserved for attribute tagging.
31 /// This can be at most the guaranteed alignment of the pointers in the page table.
32 static constexpr int ATTRIBUTE_BITS = 2;
33
34 /**
35 * Pair of host pointer and page type attribute.
36 * This uses the lower bits of a given pointer to store the attribute tag.
37 * Writing and reading the pointer attribute pair is guaranteed to be atomic for the same method
38 * call. In other words, they are guaranteed to be synchronized at all times.
39 */
40 class PageInfo {
41 public:
42 /// Returns the page pointer
43 [[nodiscard]] u8* Pointer() const noexcept {
44 return ExtractPointer(raw.load(std::memory_order_relaxed));
45 }
46
47 /// Returns the page type attribute
48 [[nodiscard]] PageType Type() const noexcept {
49 return ExtractType(raw.load(std::memory_order_relaxed));
50 }
51
52 /// Returns the page pointer and attribute pair, extracted from the same atomic read
53 [[nodiscard]] std::pair<u8*, PageType> PointerType() const noexcept {
54 const uintptr_t non_atomic_raw = raw.load(std::memory_order_relaxed);
55 return {ExtractPointer(non_atomic_raw), ExtractType(non_atomic_raw)};
56 }
57
58 /// Returns the raw representation of the page information.
59 /// Use ExtractPointer and ExtractType to unpack the value.
60 [[nodiscard]] uintptr_t Raw() const noexcept {
61 return raw.load(std::memory_order_relaxed);
62 }
63
64 /// Write a page pointer and type pair atomically
65 void Store(u8* pointer, PageType type) noexcept {
66 raw.store(reinterpret_cast<uintptr_t>(pointer) | static_cast<uintptr_t>(type));
67 }
68
69 /// Unpack a pointer from a page info raw representation
70 [[nodiscard]] static u8* ExtractPointer(uintptr_t raw) noexcept {
71 return reinterpret_cast<u8*>(raw & (~uintptr_t{0} << ATTRIBUTE_BITS));
72 }
73
74 /// Unpack a page type from a page info raw representation
75 [[nodiscard]] static PageType ExtractType(uintptr_t raw) noexcept {
76 return static_cast<PageType>(raw & ((uintptr_t{1} << ATTRIBUTE_BITS) - 1));
77 }
78
79 private:
80 std::atomic<uintptr_t> raw;
81 };
82
51 PageTable(); 83 PageTable();
52 ~PageTable() noexcept; 84 ~PageTable() noexcept;
53 85
@@ -58,25 +90,21 @@ struct PageTable {
58 PageTable& operator=(PageTable&&) noexcept = default; 90 PageTable& operator=(PageTable&&) noexcept = default;
59 91
60 /** 92 /**
61 * Resizes the page table to be able to accomodate enough pages within 93 * Resizes the page table to be able to accommodate enough pages within
62 * a given address space. 94 * a given address space.
63 * 95 *
64 * @param address_space_width_in_bits The address size width in bits. 96 * @param address_space_width_in_bits The address size width in bits.
65 * @param page_size_in_bits The page size in bits. 97 * @param page_size_in_bits The page size in bits.
66 * @param has_attribute Whether or not this page has any backing attributes.
67 */ 98 */
68 void Resize(std::size_t address_space_width_in_bits, std::size_t page_size_in_bits, 99 void Resize(size_t address_space_width_in_bits, size_t page_size_in_bits);
69 bool has_attribute);
70 100
71 /** 101 /**
72 * Vector of memory pointers backing each page. An entry can only be non-null if the 102 * Vector of memory pointers backing each page. An entry can only be non-null if the
73 * corresponding entry in the `attributes` vector is of type `Memory`. 103 * corresponding attribute element is of type `Memory`.
74 */ 104 */
75 VirtualBuffer<u8*> pointers; 105 VirtualBuffer<PageInfo> pointers;
76 106
77 VirtualBuffer<u64> backing_addr; 107 VirtualBuffer<u64> backing_addr;
78
79 VirtualBuffer<PageType> attributes;
80}; 108};
81 109
82} // namespace Common 110} // namespace Common
diff --git a/src/common/swap.h b/src/common/swap.h
index 7665942a2..a80e191dc 100644
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -394,7 +394,7 @@ public:
394 template <typename S, typename T2, typename F2> 394 template <typename S, typename T2, typename F2>
395 friend S operator%(const S& p, const swapped_t v); 395 friend S operator%(const S& p, const swapped_t v);
396 396
397 // Arithmetics + assignements 397 // Arithmetics + assignments
398 template <typename S, typename T2, typename F2> 398 template <typename S, typename T2, typename F2>
399 friend S operator+=(const S& p, const swapped_t v); 399 friend S operator+=(const S& p, const swapped_t v);
400 400
@@ -451,7 +451,7 @@ S operator%(const S& i, const swap_struct_t<T, F> v) {
451 return i % v.swap(); 451 return i % v.swap();
452} 452}
453 453
454// Arithmetics + assignements 454// Arithmetics + assignments
455template <typename S, typename T, typename F> 455template <typename S, typename T, typename F>
456S& operator+=(S& i, const swap_struct_t<T, F> v) { 456S& operator+=(S& i, const swap_struct_t<T, F> v) {
457 i += v.swap(); 457 i += v.swap();
diff --git a/src/common/thread_worker.cpp b/src/common/thread_worker.cpp
new file mode 100644
index 000000000..8f9bf447a
--- /dev/null
+++ b/src/common/thread_worker.cpp
@@ -0,0 +1,58 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/thread.h"
6#include "common/thread_worker.h"
7
8namespace Common {
9
10ThreadWorker::ThreadWorker(std::size_t num_workers, const std::string& name) {
11 for (std::size_t i = 0; i < num_workers; ++i)
12 threads.emplace_back([this, thread_name{std::string{name}}] {
13 Common::SetCurrentThreadName(thread_name.c_str());
14
15 // Wait for first request
16 {
17 std::unique_lock lock{queue_mutex};
18 condition.wait(lock, [this] { return stop || !requests.empty(); });
19 }
20
21 while (true) {
22 std::function<void()> task;
23
24 {
25 std::unique_lock lock{queue_mutex};
26 condition.wait(lock, [this] { return stop || !requests.empty(); });
27 if (stop || requests.empty()) {
28 return;
29 }
30 task = std::move(requests.front());
31 requests.pop();
32 }
33
34 task();
35 }
36 });
37}
38
39ThreadWorker::~ThreadWorker() {
40 {
41 std::unique_lock lock{queue_mutex};
42 stop = true;
43 }
44 condition.notify_all();
45 for (std::thread& thread : threads) {
46 thread.join();
47 }
48}
49
50void ThreadWorker::QueueWork(std::function<void()>&& work) {
51 {
52 std::unique_lock lock{queue_mutex};
53 requests.emplace(work);
54 }
55 condition.notify_one();
56}
57
58} // namespace Common
diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h
new file mode 100644
index 000000000..f1859971f
--- /dev/null
+++ b/src/common/thread_worker.h
@@ -0,0 +1,30 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <functional>
9#include <mutex>
10#include <string>
11#include <vector>
12#include <queue>
13
14namespace Common {
15
16class ThreadWorker final {
17public:
18 explicit ThreadWorker(std::size_t num_workers, const std::string& name);
19 ~ThreadWorker();
20 void QueueWork(std::function<void()>&& work);
21
22private:
23 std::vector<std::thread> threads;
24 std::queue<std::function<void()>> requests;
25 std::mutex queue_mutex;
26 std::condition_variable condition;
27 std::atomic_bool stop{};
28};
29
30} // namespace Common
diff --git a/src/common/virtual_buffer.h b/src/common/virtual_buffer.h
index 91d430036..fb1a6f81f 100644
--- a/src/common/virtual_buffer.h
+++ b/src/common/virtual_buffer.h
@@ -15,10 +15,12 @@ void FreeMemoryPages(void* base, std::size_t size) noexcept;
15template <typename T> 15template <typename T>
16class VirtualBuffer final { 16class VirtualBuffer final {
17public: 17public:
18 static_assert( 18 // TODO: Uncomment this and change Common::PageTable::PageInfo to be trivially constructible
19 std::is_trivially_constructible_v<T>, 19 // using std::atomic_ref once libc++ has support for it
20 "T must be trivially constructible, as non-trivial constructors will not be executed " 20 // static_assert(
21 "with the current allocator"); 21 // std::is_trivially_constructible_v<T>,
22 // "T must be trivially constructible, as non-trivial constructors will not be executed "
23 // "with the current allocator");
22 24
23 constexpr VirtualBuffer() = default; 25 constexpr VirtualBuffer() = default;
24 explicit VirtualBuffer(std::size_t count) : alloc_size{count * sizeof(T)} { 26 explicit VirtualBuffer(std::size_t count) : alloc_size{count * sizeof(T)} {
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 59bd3d2a6..893df433a 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -202,6 +202,8 @@ add_library(core STATIC
202 hle/kernel/server_port.h 202 hle/kernel/server_port.h
203 hle/kernel/server_session.cpp 203 hle/kernel/server_session.cpp
204 hle/kernel/server_session.h 204 hle/kernel/server_session.h
205 hle/kernel/service_thread.cpp
206 hle/kernel/service_thread.h
205 hle/kernel/session.cpp 207 hle/kernel/session.cpp
206 hle/kernel/session.h 208 hle/kernel/session.h
207 hle/kernel/shared_memory.cpp 209 hle/kernel/shared_memory.cpp
@@ -500,7 +502,6 @@ add_library(core STATIC
500 hle/service/sm/controller.h 502 hle/service/sm/controller.h
501 hle/service/sm/sm.cpp 503 hle/service/sm/sm.cpp
502 hle/service/sm/sm.h 504 hle/service/sm/sm.h
503 hle/service/sockets/blocking_worker.h
504 hle/service/sockets/bsd.cpp 505 hle/service/sockets/bsd.cpp
505 hle/service/sockets/bsd.h 506 hle/service/sockets/bsd.h
506 hle/service/sockets/ethc.cpp 507 hle/service/sockets/ethc.cpp
@@ -634,6 +635,8 @@ if (MSVC)
634 /we4267 635 /we4267
635 # 'context' : truncation from 'type1' to 'type2' 636 # 'context' : truncation from 'type1' to 'type2'
636 /we4305 637 /we4305
638 # 'function' : not all control paths return a value
639 /we4715
637 ) 640 )
638else() 641else()
639 target_compile_options(core PRIVATE 642 target_compile_options(core PRIVATE
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index e9c74b1a6..6c4c8e9e4 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -71,15 +71,8 @@ public:
71 } 71 }
72 72
73 void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override { 73 void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
74 switch (exception) {
75 case Dynarmic::A32::Exception::UndefinedInstruction:
76 case Dynarmic::A32::Exception::UnpredictableInstruction:
77 break;
78 case Dynarmic::A32::Exception::Breakpoint:
79 break;
80 }
81 LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", 74 LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",
82 static_cast<std::size_t>(exception), pc, MemoryReadCode(pc)); 75 exception, pc, MemoryReadCode(pc));
83 UNIMPLEMENTED(); 76 UNIMPLEMENTED();
84 } 77 }
85 78
@@ -133,6 +126,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
133 config.page_table = reinterpret_cast<std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>*>( 126 config.page_table = reinterpret_cast<std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>*>(
134 page_table.pointers.data()); 127 page_table.pointers.data());
135 config.absolute_offset_page_table = true; 128 config.absolute_offset_page_table = true;
129 config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS;
136 config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; 130 config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128;
137 config.only_detect_misalignment_via_page_table_on_page_boundary = true; 131 config.only_detect_misalignment_via_page_table_on_page_boundary = true;
138 132
@@ -180,6 +174,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
180 if (Settings::values.cpuopt_unsafe_reduce_fp_error) { 174 if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
181 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; 175 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
182 } 176 }
177 if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
178 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
179 }
183 } 180 }
184 181
185 return std::make_unique<Dynarmic::A32::Jit>(config); 182 return std::make_unique<Dynarmic::A32::Jit>(config);
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 7a4eb88a2..4c5ebca22 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -152,6 +152,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
152 // Memory 152 // Memory
153 config.page_table = reinterpret_cast<void**>(page_table.pointers.data()); 153 config.page_table = reinterpret_cast<void**>(page_table.pointers.data());
154 config.page_table_address_space_bits = address_space_bits; 154 config.page_table_address_space_bits = address_space_bits;
155 config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS;
155 config.silently_mirror_page_table = false; 156 config.silently_mirror_page_table = false;
156 config.absolute_offset_page_table = true; 157 config.absolute_offset_page_table = true;
157 config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; 158 config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128;
@@ -211,6 +212,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
211 if (Settings::values.cpuopt_unsafe_reduce_fp_error) { 212 if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
212 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; 213 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
213 } 214 }
215 if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
216 config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
217 }
214 } 218 }
215 219
216 return std::make_shared<Dynarmic::A64::Jit>(config); 220 return std::make_shared<Dynarmic::A64::Jit>(config);
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 0961c0819..1a2002dec 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -159,7 +159,7 @@ struct System::Impl {
159 device_memory = std::make_unique<Core::DeviceMemory>(); 159 device_memory = std::make_unique<Core::DeviceMemory>();
160 160
161 is_multicore = Settings::values.use_multi_core.GetValue(); 161 is_multicore = Settings::values.use_multi_core.GetValue();
162 is_async_gpu = is_multicore || Settings::values.use_asynchronous_gpu_emulation.GetValue(); 162 is_async_gpu = Settings::values.use_asynchronous_gpu_emulation.GetValue();
163 163
164 kernel.SetMulticore(is_multicore); 164 kernel.SetMulticore(is_multicore);
165 cpu_manager.SetMulticore(is_multicore); 165 cpu_manager.SetMulticore(is_multicore);
@@ -307,7 +307,6 @@ struct System::Impl {
307 service_manager.reset(); 307 service_manager.reset();
308 cheat_engine.reset(); 308 cheat_engine.reset();
309 telemetry_session.reset(); 309 telemetry_session.reset();
310 device_memory.reset();
311 310
312 // Close all CPU/threading state 311 // Close all CPU/threading state
313 cpu_manager.Shutdown(); 312 cpu_manager.Shutdown();
diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp
index da15f764a..cebe2ce37 100644
--- a/src/core/crypto/key_manager.cpp
+++ b/src/core/crypto/key_manager.cpp
@@ -143,6 +143,7 @@ u64 GetSignatureTypeDataSize(SignatureType type) {
143 return 0x3C; 143 return 0x3C;
144 } 144 }
145 UNREACHABLE(); 145 UNREACHABLE();
146 return 0;
146} 147}
147 148
148u64 GetSignatureTypePaddingSize(SignatureType type) { 149u64 GetSignatureTypePaddingSize(SignatureType type) {
@@ -157,6 +158,7 @@ u64 GetSignatureTypePaddingSize(SignatureType type) {
157 return 0x40; 158 return 0x40;
158 } 159 }
159 UNREACHABLE(); 160 UNREACHABLE();
161 return 0;
160} 162}
161 163
162SignatureType Ticket::GetSignatureType() const { 164SignatureType Ticket::GetSignatureType() const {
@@ -169,8 +171,7 @@ SignatureType Ticket::GetSignatureType() const {
169 if (const auto* ticket = std::get_if<ECDSATicket>(&data)) { 171 if (const auto* ticket = std::get_if<ECDSATicket>(&data)) {
170 return ticket->sig_type; 172 return ticket->sig_type;
171 } 173 }
172 174 throw std::bad_variant_access{};
173 UNREACHABLE();
174} 175}
175 176
176TicketData& Ticket::GetData() { 177TicketData& Ticket::GetData() {
@@ -183,8 +184,7 @@ TicketData& Ticket::GetData() {
183 if (auto* ticket = std::get_if<ECDSATicket>(&data)) { 184 if (auto* ticket = std::get_if<ECDSATicket>(&data)) {
184 return ticket->data; 185 return ticket->data;
185 } 186 }
186 187 throw std::bad_variant_access{};
187 UNREACHABLE();
188} 188}
189 189
190const TicketData& Ticket::GetData() const { 190const TicketData& Ticket::GetData() const {
@@ -197,8 +197,7 @@ const TicketData& Ticket::GetData() const {
197 if (const auto* ticket = std::get_if<ECDSATicket>(&data)) { 197 if (const auto* ticket = std::get_if<ECDSATicket>(&data)) {
198 return ticket->data; 198 return ticket->data;
199 } 199 }
200 200 throw std::bad_variant_access{};
201 UNREACHABLE();
202} 201}
203 202
204u64 Ticket::GetSize() const { 203u64 Ticket::GetSize() const {
diff --git a/src/core/file_sys/nca_patch.cpp b/src/core/file_sys/nca_patch.cpp
index adcf0732f..a65ec6798 100644
--- a/src/core/file_sys/nca_patch.cpp
+++ b/src/core/file_sys/nca_patch.cpp
@@ -51,8 +51,8 @@ std::pair<std::size_t, std::size_t> SearchBucketEntry(u64 offset, const BlockTyp
51 low = mid + 1; 51 low = mid + 1;
52 } 52 }
53 } 53 }
54
55 UNREACHABLE_MSG("Offset could not be found in BKTR block."); 54 UNREACHABLE_MSG("Offset could not be found in BKTR block.");
55 return {0, 0};
56} 56}
57} // Anonymous namespace 57} // Anonymous namespace
58 58
diff --git a/src/core/file_sys/registered_cache.cpp b/src/core/file_sys/registered_cache.cpp
index da01002d5..431302f55 100644
--- a/src/core/file_sys/registered_cache.cpp
+++ b/src/core/file_sys/registered_cache.cpp
@@ -105,7 +105,8 @@ ContentRecordType GetCRTypeFromNCAType(NCAContentType type) {
105 // TODO(DarkLordZach): Peek at NCA contents to differentiate Manual and Legal. 105 // TODO(DarkLordZach): Peek at NCA contents to differentiate Manual and Legal.
106 return ContentRecordType::HtmlDocument; 106 return ContentRecordType::HtmlDocument;
107 default: 107 default:
108 UNREACHABLE_MSG("Invalid NCAContentType={:02X}", static_cast<u8>(type)); 108 UNREACHABLE_MSG("Invalid NCAContentType={:02X}", type);
109 return ContentRecordType{};
109 } 110 }
110} 111}
111 112
diff --git a/src/core/file_sys/registered_cache.h b/src/core/file_sys/registered_cache.h
index 5b414b0f0..b08a1687a 100644
--- a/src/core/file_sys/registered_cache.h
+++ b/src/core/file_sys/registered_cache.h
@@ -67,18 +67,18 @@ public:
67 virtual void Refresh() = 0; 67 virtual void Refresh() = 0;
68 68
69 virtual bool HasEntry(u64 title_id, ContentRecordType type) const = 0; 69 virtual bool HasEntry(u64 title_id, ContentRecordType type) const = 0;
70 virtual bool HasEntry(ContentProviderEntry entry) const; 70 bool HasEntry(ContentProviderEntry entry) const;
71 71
72 virtual std::optional<u32> GetEntryVersion(u64 title_id) const = 0; 72 virtual std::optional<u32> GetEntryVersion(u64 title_id) const = 0;
73 73
74 virtual VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const = 0; 74 virtual VirtualFile GetEntryUnparsed(u64 title_id, ContentRecordType type) const = 0;
75 virtual VirtualFile GetEntryUnparsed(ContentProviderEntry entry) const; 75 VirtualFile GetEntryUnparsed(ContentProviderEntry entry) const;
76 76
77 virtual VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const = 0; 77 virtual VirtualFile GetEntryRaw(u64 title_id, ContentRecordType type) const = 0;
78 virtual VirtualFile GetEntryRaw(ContentProviderEntry entry) const; 78 VirtualFile GetEntryRaw(ContentProviderEntry entry) const;
79 79
80 virtual std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const = 0; 80 virtual std::unique_ptr<NCA> GetEntry(u64 title_id, ContentRecordType type) const = 0;
81 virtual std::unique_ptr<NCA> GetEntry(ContentProviderEntry entry) const; 81 std::unique_ptr<NCA> GetEntry(ContentProviderEntry entry) const;
82 82
83 virtual std::vector<ContentProviderEntry> ListEntries() const; 83 virtual std::vector<ContentProviderEntry> ListEntries() const;
84 84
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index e75e80ad0..83decf6cf 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -46,43 +46,6 @@ void SessionRequestHandler::ClientDisconnected(
46 boost::range::remove_erase(connected_sessions, server_session); 46 boost::range::remove_erase(connected_sessions, server_session);
47} 47}
48 48
49std::shared_ptr<WritableEvent> HLERequestContext::SleepClientThread(
50 const std::string& reason, u64 timeout, WakeupCallback&& callback,
51 std::shared_ptr<WritableEvent> writable_event) {
52 // Put the client thread to sleep until the wait event is signaled or the timeout expires.
53
54 if (!writable_event) {
55 // Create event if not provided
56 const auto pair = WritableEvent::CreateEventPair(kernel, "HLE Pause Event: " + reason);
57 writable_event = pair.writable;
58 }
59
60 Handle event_handle = InvalidHandle;
61 {
62 KScopedSchedulerLockAndSleep lock(kernel, event_handle, thread.get(), timeout);
63 thread->SetHLECallback(
64 [context = *this, callback](std::shared_ptr<Thread> thread) mutable -> bool {
65 ThreadWakeupReason reason = thread->GetSignalingResult() == RESULT_TIMEOUT
66 ? ThreadWakeupReason::Timeout
67 : ThreadWakeupReason::Signal;
68 callback(thread, context, reason);
69 context.WriteToOutgoingCommandBuffer(*thread);
70 return true;
71 });
72 const auto readable_event{writable_event->GetReadableEvent()};
73 writable_event->Clear();
74 thread->SetHLESyncObject(readable_event.get());
75 thread->SetStatus(ThreadStatus::WaitHLEEvent);
76 thread->SetSynchronizationResults(nullptr, RESULT_TIMEOUT);
77 readable_event->AddWaitingThread(thread);
78 }
79 thread->SetHLETimeEvent(event_handle);
80
81 is_thread_waiting = true;
82
83 return writable_event;
84}
85
86HLERequestContext::HLERequestContext(KernelCore& kernel, Core::Memory::Memory& memory, 49HLERequestContext::HLERequestContext(KernelCore& kernel, Core::Memory::Memory& memory,
87 std::shared_ptr<ServerSession> server_session, 50 std::shared_ptr<ServerSession> server_session,
88 std::shared_ptr<Thread> thread) 51 std::shared_ptr<Thread> thread)
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h
index c31a65476..b112e1ebd 100644
--- a/src/core/hle/kernel/hle_ipc.h
+++ b/src/core/hle/kernel/hle_ipc.h
@@ -129,23 +129,6 @@ public:
129 using WakeupCallback = std::function<void( 129 using WakeupCallback = std::function<void(
130 std::shared_ptr<Thread> thread, HLERequestContext& context, ThreadWakeupReason reason)>; 130 std::shared_ptr<Thread> thread, HLERequestContext& context, ThreadWakeupReason reason)>;
131 131
132 /**
133 * Puts the specified guest thread to sleep until the returned event is signaled or until the
134 * specified timeout expires.
135 * @param reason Reason for pausing the thread, to be used for debugging purposes.
136 * @param timeout Timeout in nanoseconds after which the thread will be awoken and the callback
137 * invoked with a Timeout reason.
138 * @param callback Callback to be invoked when the thread is resumed. This callback must write
139 * the entire command response once again, regardless of the state of it before this function
140 * was called.
141 * @param writable_event Event to use to wake up the thread. If unspecified, an event will be
142 * created.
143 * @returns Event that when signaled will resume the thread and call the callback function.
144 */
145 std::shared_ptr<WritableEvent> SleepClientThread(
146 const std::string& reason, u64 timeout, WakeupCallback&& callback,
147 std::shared_ptr<WritableEvent> writable_event = nullptr);
148
149 /// Populates this context with data from the requesting process/thread. 132 /// Populates this context with data from the requesting process/thread.
150 ResultCode PopulateFromIncomingCommandBuffer(const HandleTable& handle_table, 133 ResultCode PopulateFromIncomingCommandBuffer(const HandleTable& handle_table,
151 u32_le* src_cmdbuf); 134 u32_le* src_cmdbuf);
diff --git a/src/core/hle/kernel/k_priority_queue.h b/src/core/hle/kernel/k_priority_queue.h
index 01a577d0c..99fb8fe93 100644
--- a/src/core/hle/kernel/k_priority_queue.h
+++ b/src/core/hle/kernel/k_priority_queue.h
@@ -8,11 +8,13 @@
8#pragma once 8#pragma once
9 9
10#include <array> 10#include <array>
11#include <concepts>
11 12
12#include "common/assert.h" 13#include "common/assert.h"
13#include "common/bit_set.h" 14#include "common/bit_set.h"
14#include "common/bit_util.h" 15#include "common/bit_util.h"
15#include "common/common_types.h" 16#include "common/common_types.h"
17#include "common/concepts.h"
16 18
17namespace Kernel { 19namespace Kernel {
18 20
@@ -21,7 +23,7 @@ class Thread;
21template <typename T> 23template <typename T>
22concept KPriorityQueueAffinityMask = !std::is_reference_v<T> && requires(T & t) { 24concept KPriorityQueueAffinityMask = !std::is_reference_v<T> && requires(T & t) {
23 { t.GetAffinityMask() } 25 { t.GetAffinityMask() }
24 ->std::convertible_to<u64>; 26 ->Common::ConvertibleTo<u64>;
25 {t.SetAffinityMask(std::declval<u64>())}; 27 {t.SetAffinityMask(std::declval<u64>())};
26 28
27 { t.GetAffinity(std::declval<int32_t>()) } 29 { t.GetAffinity(std::declval<int32_t>()) }
@@ -48,9 +50,9 @@ concept KPriorityQueueMember = !std::is_reference_v<T> && requires(T & t) {
48 ->KPriorityQueueAffinityMask; 50 ->KPriorityQueueAffinityMask;
49 51
50 { t.GetActiveCore() } 52 { t.GetActiveCore() }
51 ->std::convertible_to<s32>; 53 ->Common::ConvertibleTo<s32>;
52 { t.GetPriority() } 54 { t.GetPriority() }
53 ->std::convertible_to<s32>; 55 ->Common::ConvertibleTo<s32>;
54}; 56};
55 57
56template <typename Member, size_t _NumCores, int LowestPriority, int HighestPriority> 58template <typename Member, size_t _NumCores, int LowestPriority, int HighestPriority>
diff --git a/src/core/hle/kernel/k_scheduler_lock.h b/src/core/hle/kernel/k_scheduler_lock.h
index 2d675b39e..2f1c1f691 100644
--- a/src/core/hle/kernel/k_scheduler_lock.h
+++ b/src/core/hle/kernel/k_scheduler_lock.h
@@ -10,6 +10,7 @@
10#include "common/assert.h" 10#include "common/assert.h"
11#include "common/spin_lock.h" 11#include "common/spin_lock.h"
12#include "core/hardware_properties.h" 12#include "core/hardware_properties.h"
13#include "core/hle/kernel/kernel.h"
13 14
14namespace Kernel { 15namespace Kernel {
15 16
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 04cae3a43..e8ece8164 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -8,13 +8,14 @@
8#include <functional> 8#include <functional>
9#include <memory> 9#include <memory>
10#include <thread> 10#include <thread>
11#include <unordered_map> 11#include <unordered_set>
12#include <utility> 12#include <utility>
13 13
14#include "common/assert.h" 14#include "common/assert.h"
15#include "common/logging/log.h" 15#include "common/logging/log.h"
16#include "common/microprofile.h" 16#include "common/microprofile.h"
17#include "common/thread.h" 17#include "common/thread.h"
18#include "common/thread_worker.h"
18#include "core/arm/arm_interface.h" 19#include "core/arm/arm_interface.h"
19#include "core/arm/cpu_interrupt_handler.h" 20#include "core/arm/cpu_interrupt_handler.h"
20#include "core/arm/exclusive_monitor.h" 21#include "core/arm/exclusive_monitor.h"
@@ -35,6 +36,7 @@
35#include "core/hle/kernel/physical_core.h" 36#include "core/hle/kernel/physical_core.h"
36#include "core/hle/kernel/process.h" 37#include "core/hle/kernel/process.h"
37#include "core/hle/kernel/resource_limit.h" 38#include "core/hle/kernel/resource_limit.h"
39#include "core/hle/kernel/service_thread.h"
38#include "core/hle/kernel/shared_memory.h" 40#include "core/hle/kernel/shared_memory.h"
39#include "core/hle/kernel/synchronization.h" 41#include "core/hle/kernel/synchronization.h"
40#include "core/hle/kernel/thread.h" 42#include "core/hle/kernel/thread.h"
@@ -60,6 +62,8 @@ struct KernelCore::Impl {
60 RegisterHostThread(); 62 RegisterHostThread();
61 63
62 global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel); 64 global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel);
65 service_thread_manager =
66 std::make_unique<Common::ThreadWorker>(1, "yuzu:ServiceThreadManager");
63 67
64 InitializePhysicalCores(); 68 InitializePhysicalCores();
65 InitializeSystemResourceLimit(kernel); 69 InitializeSystemResourceLimit(kernel);
@@ -76,6 +80,12 @@ struct KernelCore::Impl {
76 } 80 }
77 81
78 void Shutdown() { 82 void Shutdown() {
83 process_list.clear();
84
85 // Ensures all service threads gracefully shutdown
86 service_thread_manager.reset();
87 service_threads.clear();
88
79 next_object_id = 0; 89 next_object_id = 0;
80 next_kernel_process_id = Process::InitialKIPIDMin; 90 next_kernel_process_id = Process::InitialKIPIDMin;
81 next_user_process_id = Process::ProcessIDMin; 91 next_user_process_id = Process::ProcessIDMin;
@@ -89,8 +99,6 @@ struct KernelCore::Impl {
89 99
90 cores.clear(); 100 cores.clear();
91 101
92 process_list.clear();
93
94 current_process = nullptr; 102 current_process = nullptr;
95 103
96 system_resource_limit = nullptr; 104 system_resource_limit = nullptr;
@@ -103,10 +111,8 @@ struct KernelCore::Impl {
103 111
104 exclusive_monitor.reset(); 112 exclusive_monitor.reset();
105 113
106 num_host_threads = 0; 114 // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others
107 std::fill(register_host_thread_keys.begin(), register_host_thread_keys.end(), 115 next_host_thread_id = Core::Hardware::NUM_CPU_CORES;
108 std::thread::id{});
109 std::fill(register_host_thread_values.begin(), register_host_thread_values.end(), 0);
110 } 116 }
111 117
112 void InitializePhysicalCores() { 118 void InitializePhysicalCores() {
@@ -186,52 +192,46 @@ struct KernelCore::Impl {
186 } 192 }
187 } 193 }
188 194
195 /// Creates a new host thread ID, should only be called by GetHostThreadId
196 u32 AllocateHostThreadId(std::optional<std::size_t> core_id) {
197 if (core_id) {
198 // The first for slots are reserved for CPU core threads
199 ASSERT(*core_id < Core::Hardware::NUM_CPU_CORES);
200 return static_cast<u32>(*core_id);
201 } else {
202 return next_host_thread_id++;
203 }
204 }
205
206 /// Gets the host thread ID for the caller, allocating a new one if this is the first time
207 u32 GetHostThreadId(std::optional<std::size_t> core_id = std::nullopt) {
208 const thread_local auto host_thread_id{AllocateHostThreadId(core_id)};
209 return host_thread_id;
210 }
211
212 /// Registers a CPU core thread by allocating a host thread ID for it
189 void RegisterCoreThread(std::size_t core_id) { 213 void RegisterCoreThread(std::size_t core_id) {
190 const std::thread::id this_id = std::this_thread::get_id(); 214 ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
215 const auto this_id = GetHostThreadId(core_id);
191 if (!is_multicore) { 216 if (!is_multicore) {
192 single_core_thread_id = this_id; 217 single_core_thread_id = this_id;
193 } 218 }
194 const auto end =
195 register_host_thread_keys.begin() + static_cast<ptrdiff_t>(num_host_threads);
196 const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
197 ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
198 ASSERT(it == end);
199 InsertHostThread(static_cast<u32>(core_id));
200 } 219 }
201 220
221 /// Registers a new host thread by allocating a host thread ID for it
202 void RegisterHostThread() { 222 void RegisterHostThread() {
203 const std::thread::id this_id = std::this_thread::get_id(); 223 [[maybe_unused]] const auto this_id = GetHostThreadId();
204 const auto end =
205 register_host_thread_keys.begin() + static_cast<ptrdiff_t>(num_host_threads);
206 const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
207 if (it == end) {
208 InsertHostThread(registered_thread_ids++);
209 }
210 }
211
212 void InsertHostThread(u32 value) {
213 const size_t index = num_host_threads++;
214 ASSERT_MSG(index < NUM_REGISTRABLE_HOST_THREADS, "Too many host threads");
215 register_host_thread_values[index] = value;
216 register_host_thread_keys[index] = std::this_thread::get_id();
217 } 224 }
218 225
219 [[nodiscard]] u32 GetCurrentHostThreadID() const { 226 [[nodiscard]] u32 GetCurrentHostThreadID() {
220 const std::thread::id this_id = std::this_thread::get_id(); 227 const auto this_id = GetHostThreadId();
221 if (!is_multicore && single_core_thread_id == this_id) { 228 if (!is_multicore && single_core_thread_id == this_id) {
222 return static_cast<u32>(system.GetCpuManager().CurrentCore()); 229 return static_cast<u32>(system.GetCpuManager().CurrentCore());
223 } 230 }
224 const auto end = 231 return this_id;
225 register_host_thread_keys.begin() + static_cast<ptrdiff_t>(num_host_threads);
226 const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
227 if (it == end) {
228 return Core::INVALID_HOST_THREAD_ID;
229 }
230 return register_host_thread_values[static_cast<size_t>(
231 std::distance(register_host_thread_keys.begin(), it))];
232 } 232 }
233 233
234 Core::EmuThreadHandle GetCurrentEmuThreadID() const { 234 [[nodiscard]] Core::EmuThreadHandle GetCurrentEmuThreadID() {
235 Core::EmuThreadHandle result = Core::EmuThreadHandle::InvalidHandle(); 235 Core::EmuThreadHandle result = Core::EmuThreadHandle::InvalidHandle();
236 result.host_handle = GetCurrentHostThreadID(); 236 result.host_handle = GetCurrentHostThreadID();
237 if (result.host_handle >= Core::Hardware::NUM_CPU_CORES) { 237 if (result.host_handle >= Core::Hardware::NUM_CPU_CORES) {
@@ -325,15 +325,8 @@ struct KernelCore::Impl {
325 std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor; 325 std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor;
326 std::vector<Kernel::PhysicalCore> cores; 326 std::vector<Kernel::PhysicalCore> cores;
327 327
328 // 0-3 IDs represent core threads, >3 represent others 328 // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others
329 std::atomic<u32> registered_thread_ids{Core::Hardware::NUM_CPU_CORES}; 329 std::atomic<u32> next_host_thread_id{Core::Hardware::NUM_CPU_CORES};
330
331 // Number of host threads is a relatively high number to avoid overflowing
332 static constexpr size_t NUM_REGISTRABLE_HOST_THREADS = 64;
333 std::atomic<size_t> num_host_threads{0};
334 std::array<std::atomic<std::thread::id>, NUM_REGISTRABLE_HOST_THREADS>
335 register_host_thread_keys{};
336 std::array<std::atomic<u32>, NUM_REGISTRABLE_HOST_THREADS> register_host_thread_values{};
337 330
338 // Kernel memory management 331 // Kernel memory management
339 std::unique_ptr<Memory::MemoryManager> memory_manager; 332 std::unique_ptr<Memory::MemoryManager> memory_manager;
@@ -345,12 +338,19 @@ struct KernelCore::Impl {
345 std::shared_ptr<Kernel::SharedMemory> irs_shared_mem; 338 std::shared_ptr<Kernel::SharedMemory> irs_shared_mem;
346 std::shared_ptr<Kernel::SharedMemory> time_shared_mem; 339 std::shared_ptr<Kernel::SharedMemory> time_shared_mem;
347 340
341 // Threads used for services
342 std::unordered_set<std::shared_ptr<Kernel::ServiceThread>> service_threads;
343
344 // Service threads are managed by a worker thread, so that a calling service thread can queue up
345 // the release of itself
346 std::unique_ptr<Common::ThreadWorker> service_thread_manager;
347
348 std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{}; 348 std::array<std::shared_ptr<Thread>, Core::Hardware::NUM_CPU_CORES> suspend_threads{};
349 std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES> interrupts{}; 349 std::array<Core::CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES> interrupts{};
350 std::array<std::unique_ptr<Kernel::KScheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{}; 350 std::array<std::unique_ptr<Kernel::KScheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{};
351 351
352 bool is_multicore{}; 352 bool is_multicore{};
353 std::thread::id single_core_thread_id{}; 353 u32 single_core_thread_id{};
354 354
355 std::array<u64, Core::Hardware::NUM_CPU_CORES> svc_ticks{}; 355 std::array<u64, Core::Hardware::NUM_CPU_CORES> svc_ticks{};
356 356
@@ -639,4 +639,19 @@ void KernelCore::ExitSVCProfile() {
639 MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]); 639 MicroProfileLeave(MICROPROFILE_TOKEN(Kernel_SVC), impl->svc_ticks[core]);
640} 640}
641 641
642std::weak_ptr<Kernel::ServiceThread> KernelCore::CreateServiceThread(const std::string& name) {
643 auto service_thread = std::make_shared<Kernel::ServiceThread>(*this, 1, name);
644 impl->service_thread_manager->QueueWork(
645 [this, service_thread] { impl->service_threads.emplace(service_thread); });
646 return service_thread;
647}
648
649void KernelCore::ReleaseServiceThread(std::weak_ptr<Kernel::ServiceThread> service_thread) {
650 impl->service_thread_manager->QueueWork([this, service_thread] {
651 if (auto strong_ptr = service_thread.lock()) {
652 impl->service_threads.erase(strong_ptr);
653 }
654 });
655}
656
642} // namespace Kernel 657} // namespace Kernel
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 5846c3f39..e3169f5a7 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -42,6 +42,7 @@ class Process;
42class ResourceLimit; 42class ResourceLimit;
43class KScheduler; 43class KScheduler;
44class SharedMemory; 44class SharedMemory;
45class ServiceThread;
45class Synchronization; 46class Synchronization;
46class Thread; 47class Thread;
47class TimeManager; 48class TimeManager;
@@ -227,6 +228,22 @@ public:
227 228
228 void ExitSVCProfile(); 229 void ExitSVCProfile();
229 230
231 /**
232 * Creates an HLE service thread, which are used to execute service routines asynchronously.
233 * While these are allocated per ServerSession, these need to be owned and managed outside of
234 * ServerSession to avoid a circular dependency.
235 * @param name String name for the ServerSession creating this thread, used for debug purposes.
236 * @returns The a weak pointer newly created service thread.
237 */
238 std::weak_ptr<Kernel::ServiceThread> CreateServiceThread(const std::string& name);
239
240 /**
241 * Releases a HLE service thread, instructing KernelCore to free it. This should be called when
242 * the ServerSession associated with the thread is destroyed.
243 * @param service_thread Service thread to release.
244 */
245 void ReleaseServiceThread(std::weak_ptr<Kernel::ServiceThread> service_thread);
246
230private: 247private:
231 friend class Object; 248 friend class Object;
232 friend class Process; 249 friend class Process;
diff --git a/src/core/hle/kernel/memory/address_space_info.cpp b/src/core/hle/kernel/memory/address_space_info.cpp
index e4288cab4..6cf43ba24 100644
--- a/src/core/hle/kernel/memory/address_space_info.cpp
+++ b/src/core/hle/kernel/memory/address_space_info.cpp
@@ -96,6 +96,7 @@ u64 AddressSpaceInfo::GetAddressSpaceStart(std::size_t width, Type type) {
96 return AddressSpaceInfos[AddressSpaceIndices39Bit[index]].address; 96 return AddressSpaceInfos[AddressSpaceIndices39Bit[index]].address;
97 } 97 }
98 UNREACHABLE(); 98 UNREACHABLE();
99 return 0;
99} 100}
100 101
101std::size_t AddressSpaceInfo::GetAddressSpaceSize(std::size_t width, Type type) { 102std::size_t AddressSpaceInfo::GetAddressSpaceSize(std::size_t width, Type type) {
@@ -112,6 +113,7 @@ std::size_t AddressSpaceInfo::GetAddressSpaceSize(std::size_t width, Type type)
112 return AddressSpaceInfos[AddressSpaceIndices39Bit[index]].size; 113 return AddressSpaceInfos[AddressSpaceIndices39Bit[index]].size;
113 } 114 }
114 UNREACHABLE(); 115 UNREACHABLE();
116 return 0;
115} 117}
116 118
117} // namespace Kernel::Memory 119} // namespace Kernel::Memory
diff --git a/src/core/hle/kernel/memory/memory_block.h b/src/core/hle/kernel/memory/memory_block.h
index 37fe19916..83acece1e 100644
--- a/src/core/hle/kernel/memory/memory_block.h
+++ b/src/core/hle/kernel/memory/memory_block.h
@@ -73,12 +73,12 @@ enum class MemoryState : u32 {
73 ThreadLocal = 73 ThreadLocal =
74 static_cast<u32>(Svc::MemoryState::ThreadLocal) | FlagMapped | FlagReferenceCounted, 74 static_cast<u32>(Svc::MemoryState::ThreadLocal) | FlagMapped | FlagReferenceCounted,
75 75
76 Transfered = static_cast<u32>(Svc::MemoryState::Transfered) | FlagsMisc | 76 Transferred = static_cast<u32>(Svc::MemoryState::Transferred) | FlagsMisc |
77 FlagCanAlignedDeviceMap | FlagCanChangeAttribute | FlagCanUseIpc | 77 FlagCanAlignedDeviceMap | FlagCanChangeAttribute | FlagCanUseIpc |
78 FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, 78 FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc,
79 79
80 SharedTransfered = static_cast<u32>(Svc::MemoryState::SharedTransfered) | FlagsMisc | 80 SharedTransferred = static_cast<u32>(Svc::MemoryState::SharedTransferred) | FlagsMisc |
81 FlagCanAlignedDeviceMap | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, 81 FlagCanAlignedDeviceMap | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc,
82 82
83 SharedCode = static_cast<u32>(Svc::MemoryState::SharedCode) | FlagMapped | 83 SharedCode = static_cast<u32>(Svc::MemoryState::SharedCode) | FlagMapped |
84 FlagReferenceCounted | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc, 84 FlagReferenceCounted | FlagCanUseNonSecureIpc | FlagCanUseNonDeviceIpc,
@@ -111,8 +111,8 @@ static_assert(static_cast<u32>(MemoryState::AliasCodeData) == 0x03FFBD09);
111static_assert(static_cast<u32>(MemoryState::Ipc) == 0x005C3C0A); 111static_assert(static_cast<u32>(MemoryState::Ipc) == 0x005C3C0A);
112static_assert(static_cast<u32>(MemoryState::Stack) == 0x005C3C0B); 112static_assert(static_cast<u32>(MemoryState::Stack) == 0x005C3C0B);
113static_assert(static_cast<u32>(MemoryState::ThreadLocal) == 0x0040200C); 113static_assert(static_cast<u32>(MemoryState::ThreadLocal) == 0x0040200C);
114static_assert(static_cast<u32>(MemoryState::Transfered) == 0x015C3C0D); 114static_assert(static_cast<u32>(MemoryState::Transferred) == 0x015C3C0D);
115static_assert(static_cast<u32>(MemoryState::SharedTransfered) == 0x005C380E); 115static_assert(static_cast<u32>(MemoryState::SharedTransferred) == 0x005C380E);
116static_assert(static_cast<u32>(MemoryState::SharedCode) == 0x0040380F); 116static_assert(static_cast<u32>(MemoryState::SharedCode) == 0x0040380F);
117static_assert(static_cast<u32>(MemoryState::Inaccessible) == 0x00000010); 117static_assert(static_cast<u32>(MemoryState::Inaccessible) == 0x00000010);
118static_assert(static_cast<u32>(MemoryState::NonSecureIpc) == 0x005C3811); 118static_assert(static_cast<u32>(MemoryState::NonSecureIpc) == 0x005C3811);
diff --git a/src/core/hle/kernel/memory/page_table.cpp b/src/core/hle/kernel/memory/page_table.cpp
index f53a7be82..080886554 100644
--- a/src/core/hle/kernel/memory/page_table.cpp
+++ b/src/core/hle/kernel/memory/page_table.cpp
@@ -265,7 +265,7 @@ ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_t
265 physical_memory_usage = 0; 265 physical_memory_usage = 0;
266 memory_pool = pool; 266 memory_pool = pool;
267 267
268 page_table_impl.Resize(address_space_width, PageBits, true); 268 page_table_impl.Resize(address_space_width, PageBits);
269 269
270 return InitializeMemoryLayout(start, end); 270 return InitializeMemoryLayout(start, end);
271} 271}
@@ -1007,8 +1007,8 @@ constexpr VAddr PageTable::GetRegionAddress(MemoryState state) const {
1007 case MemoryState::Shared: 1007 case MemoryState::Shared:
1008 case MemoryState::AliasCode: 1008 case MemoryState::AliasCode:
1009 case MemoryState::AliasCodeData: 1009 case MemoryState::AliasCodeData:
1010 case MemoryState::Transfered: 1010 case MemoryState::Transferred:
1011 case MemoryState::SharedTransfered: 1011 case MemoryState::SharedTransferred:
1012 case MemoryState::SharedCode: 1012 case MemoryState::SharedCode:
1013 case MemoryState::GeneratedCode: 1013 case MemoryState::GeneratedCode:
1014 case MemoryState::CodeOut: 1014 case MemoryState::CodeOut:
@@ -1042,8 +1042,8 @@ constexpr std::size_t PageTable::GetRegionSize(MemoryState state) const {
1042 case MemoryState::Shared: 1042 case MemoryState::Shared:
1043 case MemoryState::AliasCode: 1043 case MemoryState::AliasCode:
1044 case MemoryState::AliasCodeData: 1044 case MemoryState::AliasCodeData:
1045 case MemoryState::Transfered: 1045 case MemoryState::Transferred:
1046 case MemoryState::SharedTransfered: 1046 case MemoryState::SharedTransferred:
1047 case MemoryState::SharedCode: 1047 case MemoryState::SharedCode:
1048 case MemoryState::GeneratedCode: 1048 case MemoryState::GeneratedCode:
1049 case MemoryState::CodeOut: 1049 case MemoryState::CodeOut:
@@ -1080,8 +1080,8 @@ constexpr bool PageTable::CanContain(VAddr addr, std::size_t size, MemoryState s
1080 case MemoryState::AliasCodeData: 1080 case MemoryState::AliasCodeData:
1081 case MemoryState::Stack: 1081 case MemoryState::Stack:
1082 case MemoryState::ThreadLocal: 1082 case MemoryState::ThreadLocal:
1083 case MemoryState::Transfered: 1083 case MemoryState::Transferred:
1084 case MemoryState::SharedTransfered: 1084 case MemoryState::SharedTransferred:
1085 case MemoryState::SharedCode: 1085 case MemoryState::SharedCode:
1086 case MemoryState::GeneratedCode: 1086 case MemoryState::GeneratedCode:
1087 case MemoryState::CodeOut: 1087 case MemoryState::CodeOut:
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp
index a35c8aa4b..b40fe3916 100644
--- a/src/core/hle/kernel/server_session.cpp
+++ b/src/core/hle/kernel/server_session.cpp
@@ -25,19 +25,19 @@
25namespace Kernel { 25namespace Kernel {
26 26
27ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {} 27ServerSession::ServerSession(KernelCore& kernel) : SynchronizationObject{kernel} {}
28ServerSession::~ServerSession() = default; 28
29ServerSession::~ServerSession() {
30 kernel.ReleaseServiceThread(service_thread);
31}
29 32
30ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kernel, 33ResultVal<std::shared_ptr<ServerSession>> ServerSession::Create(KernelCore& kernel,
31 std::shared_ptr<Session> parent, 34 std::shared_ptr<Session> parent,
32 std::string name) { 35 std::string name) {
33 std::shared_ptr<ServerSession> session{std::make_shared<ServerSession>(kernel)}; 36 std::shared_ptr<ServerSession> session{std::make_shared<ServerSession>(kernel)};
34 37
35 session->request_event =
36 Core::Timing::CreateEvent(name, [session](std::uintptr_t, std::chrono::nanoseconds) {
37 session->CompleteSyncRequest();
38 });
39 session->name = std::move(name); 38 session->name = std::move(name);
40 session->parent = std::move(parent); 39 session->parent = std::move(parent);
40 session->service_thread = kernel.CreateServiceThread(session->name);
41 41
42 return MakeResult(std::move(session)); 42 return MakeResult(std::move(session));
43} 43}
@@ -142,16 +142,16 @@ ResultCode ServerSession::QueueSyncRequest(std::shared_ptr<Thread> thread,
142 std::make_shared<HLERequestContext>(kernel, memory, SharedFrom(this), std::move(thread)); 142 std::make_shared<HLERequestContext>(kernel, memory, SharedFrom(this), std::move(thread));
143 143
144 context->PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf); 144 context->PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf);
145 request_queue.Push(std::move(context)); 145
146 if (auto strong_ptr = service_thread.lock()) {
147 strong_ptr->QueueSyncRequest(*this, std::move(context));
148 return RESULT_SUCCESS;
149 }
146 150
147 return RESULT_SUCCESS; 151 return RESULT_SUCCESS;
148} 152}
149 153
150ResultCode ServerSession::CompleteSyncRequest() { 154ResultCode ServerSession::CompleteSyncRequest(HLERequestContext& context) {
151 ASSERT(!request_queue.Empty());
152
153 auto& context = *request_queue.Front();
154
155 ResultCode result = RESULT_SUCCESS; 155 ResultCode result = RESULT_SUCCESS;
156 // If the session has been converted to a domain, handle the domain request 156 // If the session has been converted to a domain, handle the domain request
157 if (IsDomain() && context.HasDomainMessageHeader()) { 157 if (IsDomain() && context.HasDomainMessageHeader()) {
@@ -177,18 +177,13 @@ ResultCode ServerSession::CompleteSyncRequest() {
177 } 177 }
178 } 178 }
179 179
180 request_queue.Pop();
181
182 return result; 180 return result;
183} 181}
184 182
185ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread, 183ResultCode ServerSession::HandleSyncRequest(std::shared_ptr<Thread> thread,
186 Core::Memory::Memory& memory, 184 Core::Memory::Memory& memory,
187 Core::Timing::CoreTiming& core_timing) { 185 Core::Timing::CoreTiming& core_timing) {
188 const ResultCode result = QueueSyncRequest(std::move(thread), memory); 186 return QueueSyncRequest(std::move(thread), memory);
189 const auto delay = std::chrono::nanoseconds{kernel.IsMulticore() ? 0 : 20000};
190 core_timing.ScheduleEvent(delay, request_event, {});
191 return result;
192} 187}
193 188
194} // namespace Kernel 189} // namespace Kernel
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h
index d23e9ec68..e8d1d99ea 100644
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -10,6 +10,7 @@
10#include <vector> 10#include <vector>
11 11
12#include "common/threadsafe_queue.h" 12#include "common/threadsafe_queue.h"
13#include "core/hle/kernel/service_thread.h"
13#include "core/hle/kernel/synchronization_object.h" 14#include "core/hle/kernel/synchronization_object.h"
14#include "core/hle/result.h" 15#include "core/hle/result.h"
15 16
@@ -43,6 +44,8 @@ class Thread;
43 * TLS buffer and control is transferred back to it. 44 * TLS buffer and control is transferred back to it.
44 */ 45 */
45class ServerSession final : public SynchronizationObject { 46class ServerSession final : public SynchronizationObject {
47 friend class ServiceThread;
48
46public: 49public:
47 explicit ServerSession(KernelCore& kernel); 50 explicit ServerSession(KernelCore& kernel);
48 ~ServerSession() override; 51 ~ServerSession() override;
@@ -132,7 +135,7 @@ private:
132 ResultCode QueueSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory); 135 ResultCode QueueSyncRequest(std::shared_ptr<Thread> thread, Core::Memory::Memory& memory);
133 136
134 /// Completes a sync request from the emulated application. 137 /// Completes a sync request from the emulated application.
135 ResultCode CompleteSyncRequest(); 138 ResultCode CompleteSyncRequest(HLERequestContext& context);
136 139
137 /// Handles a SyncRequest to a domain, forwarding the request to the proper object or closing an 140 /// Handles a SyncRequest to a domain, forwarding the request to the proper object or closing an
138 /// object handle. 141 /// object handle.
@@ -163,11 +166,8 @@ private:
163 /// The name of this session (optional) 166 /// The name of this session (optional)
164 std::string name; 167 std::string name;
165 168
166 /// Core timing event used to schedule the service request at some point in the future 169 /// Thread to dispatch service requests
167 std::shared_ptr<Core::Timing::EventType> request_event; 170 std::weak_ptr<ServiceThread> service_thread;
168
169 /// Queue of scheduled service requests
170 Common::MPSCQueue<std::shared_ptr<Kernel::HLERequestContext>> request_queue;
171}; 171};
172 172
173} // namespace Kernel 173} // namespace Kernel
diff --git a/src/core/hle/kernel/service_thread.cpp b/src/core/hle/kernel/service_thread.cpp
new file mode 100644
index 000000000..ee46f3e21
--- /dev/null
+++ b/src/core/hle/kernel/service_thread.cpp
@@ -0,0 +1,110 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <condition_variable>
6#include <functional>
7#include <mutex>
8#include <thread>
9#include <vector>
10#include <queue>
11
12#include "common/assert.h"
13#include "common/scope_exit.h"
14#include "common/thread.h"
15#include "core/core.h"
16#include "core/hle/kernel/kernel.h"
17#include "core/hle/kernel/server_session.h"
18#include "core/hle/kernel/service_thread.h"
19#include "core/hle/lock.h"
20#include "video_core/renderer_base.h"
21
22namespace Kernel {
23
24class ServiceThread::Impl final {
25public:
26 explicit Impl(KernelCore& kernel, std::size_t num_threads, const std::string& name);
27 ~Impl();
28
29 void QueueSyncRequest(ServerSession& session, std::shared_ptr<HLERequestContext>&& context);
30
31private:
32 std::vector<std::thread> threads;
33 std::queue<std::function<void()>> requests;
34 std::mutex queue_mutex;
35 std::condition_variable condition;
36 const std::string service_name;
37 bool stop{};
38};
39
40ServiceThread::Impl::Impl(KernelCore& kernel, std::size_t num_threads, const std::string& name)
41 : service_name{name} {
42 for (std::size_t i = 0; i < num_threads; ++i)
43 threads.emplace_back([this, &kernel] {
44 Common::SetCurrentThreadName(std::string{"yuzu:HleService:" + service_name}.c_str());
45
46 // Wait for first request before trying to acquire a render context
47 {
48 std::unique_lock lock{queue_mutex};
49 condition.wait(lock, [this] { return stop || !requests.empty(); });
50 }
51
52 kernel.RegisterHostThread();
53
54 while (true) {
55 std::function<void()> task;
56
57 {
58 std::unique_lock lock{queue_mutex};
59 condition.wait(lock, [this] { return stop || !requests.empty(); });
60 if (stop || requests.empty()) {
61 return;
62 }
63 task = std::move(requests.front());
64 requests.pop();
65 }
66
67 task();
68 }
69 });
70}
71
72void ServiceThread::Impl::QueueSyncRequest(ServerSession& session,
73 std::shared_ptr<HLERequestContext>&& context) {
74 {
75 std::unique_lock lock{queue_mutex};
76
77 // ServerSession owns the service thread, so we cannot caption a strong pointer here in the
78 // event that the ServerSession is terminated.
79 std::weak_ptr<ServerSession> weak_ptr{SharedFrom(&session)};
80 requests.emplace([weak_ptr, context{std::move(context)}]() {
81 if (auto strong_ptr = weak_ptr.lock()) {
82 strong_ptr->CompleteSyncRequest(*context);
83 }
84 });
85 }
86 condition.notify_one();
87}
88
89ServiceThread::Impl::~Impl() {
90 {
91 std::unique_lock lock{queue_mutex};
92 stop = true;
93 }
94 condition.notify_all();
95 for (std::thread& thread : threads) {
96 thread.join();
97 }
98}
99
100ServiceThread::ServiceThread(KernelCore& kernel, std::size_t num_threads, const std::string& name)
101 : impl{std::make_unique<Impl>(kernel, num_threads, name)} {}
102
103ServiceThread::~ServiceThread() = default;
104
105void ServiceThread::QueueSyncRequest(ServerSession& session,
106 std::shared_ptr<HLERequestContext>&& context) {
107 impl->QueueSyncRequest(session, std::move(context));
108}
109
110} // namespace Kernel
diff --git a/src/core/hle/kernel/service_thread.h b/src/core/hle/kernel/service_thread.h
new file mode 100644
index 000000000..025ab8fb5
--- /dev/null
+++ b/src/core/hle/kernel/service_thread.h
@@ -0,0 +1,28 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <string>
9
10namespace Kernel {
11
12class HLERequestContext;
13class KernelCore;
14class ServerSession;
15
16class ServiceThread final {
17public:
18 explicit ServiceThread(KernelCore& kernel, std::size_t num_threads, const std::string& name);
19 ~ServiceThread();
20
21 void QueueSyncRequest(ServerSession& session, std::shared_ptr<HLERequestContext>&& context);
22
23private:
24 class Impl;
25 std::unique_ptr<Impl> impl;
26};
27
28} // namespace Kernel
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 2d225392f..de3ed25da 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1583,7 +1583,7 @@ static void ExitThread32(Core::System& system) {
1583 1583
1584/// Sleep the current thread 1584/// Sleep the current thread
1585static void SleepThread(Core::System& system, s64 nanoseconds) { 1585static void SleepThread(Core::System& system, s64 nanoseconds) {
1586 LOG_DEBUG(Kernel_SVC, "called nanoseconds={}", nanoseconds); 1586 LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds);
1587 1587
1588 enum class SleepType : s64 { 1588 enum class SleepType : s64 {
1589 YieldWithoutCoreMigration = 0, 1589 YieldWithoutCoreMigration = 0,
diff --git a/src/core/hle/kernel/svc_types.h b/src/core/hle/kernel/svc_types.h
index 986724beb..11e1d8e2d 100644
--- a/src/core/hle/kernel/svc_types.h
+++ b/src/core/hle/kernel/svc_types.h
@@ -23,8 +23,8 @@ enum class MemoryState : u32 {
23 Ipc = 0x0A, 23 Ipc = 0x0A,
24 Stack = 0x0B, 24 Stack = 0x0B,
25 ThreadLocal = 0x0C, 25 ThreadLocal = 0x0C,
26 Transfered = 0x0D, 26 Transferred = 0x0D,
27 SharedTransfered = 0x0E, 27 SharedTransferred = 0x0E,
28 SharedCode = 0x0F, 28 SharedCode = 0x0F,
29 Inaccessible = 0x10, 29 Inaccessible = 0x10,
30 NonSecureIpc = 0x11, 30 NonSecureIpc = 0x11,
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index cb13210e5..c9808060a 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -560,14 +560,14 @@ void ISelfController::GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequest
560 560
561AppletMessageQueue::AppletMessageQueue(Kernel::KernelCore& kernel) { 561AppletMessageQueue::AppletMessageQueue(Kernel::KernelCore& kernel) {
562 on_new_message = 562 on_new_message =
563 Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OnMessageRecieved"); 563 Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OnMessageReceived");
564 on_operation_mode_changed = 564 on_operation_mode_changed =
565 Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OperationModeChanged"); 565 Kernel::WritableEvent::CreateEventPair(kernel, "AMMessageQueue:OperationModeChanged");
566} 566}
567 567
568AppletMessageQueue::~AppletMessageQueue() = default; 568AppletMessageQueue::~AppletMessageQueue() = default;
569 569
570const std::shared_ptr<Kernel::ReadableEvent>& AppletMessageQueue::GetMesssageRecieveEvent() const { 570const std::shared_ptr<Kernel::ReadableEvent>& AppletMessageQueue::GetMessageReceiveEvent() const {
571 return on_new_message.readable; 571 return on_new_message.readable;
572} 572}
573 573
@@ -675,7 +675,7 @@ void ICommonStateGetter::GetEventHandle(Kernel::HLERequestContext& ctx) {
675 675
676 IPC::ResponseBuilder rb{ctx, 2, 1}; 676 IPC::ResponseBuilder rb{ctx, 2, 1};
677 rb.Push(RESULT_SUCCESS); 677 rb.Push(RESULT_SUCCESS);
678 rb.PushCopyObjects(msg_queue->GetMesssageRecieveEvent()); 678 rb.PushCopyObjects(msg_queue->GetMessageReceiveEvent());
679} 679}
680 680
681void ICommonStateGetter::ReceiveMessage(Kernel::HLERequestContext& ctx) { 681void ICommonStateGetter::ReceiveMessage(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index b1da0d081..f51aca1af 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -55,7 +55,7 @@ public:
55 explicit AppletMessageQueue(Kernel::KernelCore& kernel); 55 explicit AppletMessageQueue(Kernel::KernelCore& kernel);
56 ~AppletMessageQueue(); 56 ~AppletMessageQueue();
57 57
58 const std::shared_ptr<Kernel::ReadableEvent>& GetMesssageRecieveEvent() const; 58 const std::shared_ptr<Kernel::ReadableEvent>& GetMessageReceiveEvent() const;
59 const std::shared_ptr<Kernel::ReadableEvent>& GetOperationModeChangedEvent() const; 59 const std::shared_ptr<Kernel::ReadableEvent>& GetOperationModeChangedEvent() const;
60 void PushMessage(AppletMessage msg); 60 void PushMessage(AppletMessage msg);
61 AppletMessage PopMessage(); 61 AppletMessage PopMessage();
diff --git a/src/core/hle/service/apm/interface.cpp b/src/core/hle/service/apm/interface.cpp
index 298f6d520..0bff97a37 100644
--- a/src/core/hle/service/apm/interface.cpp
+++ b/src/core/hle/service/apm/interface.cpp
@@ -56,7 +56,7 @@ APM::APM(Core::System& system_, std::shared_ptr<Module> apm_, Controller& contro
56 static const FunctionInfo functions[] = { 56 static const FunctionInfo functions[] = {
57 {0, &APM::OpenSession, "OpenSession"}, 57 {0, &APM::OpenSession, "OpenSession"},
58 {1, &APM::GetPerformanceMode, "GetPerformanceMode"}, 58 {1, &APM::GetPerformanceMode, "GetPerformanceMode"},
59 {6, nullptr, "IsCpuOverclockEnabled"}, 59 {6, &APM::IsCpuOverclockEnabled, "IsCpuOverclockEnabled"},
60 }; 60 };
61 RegisterHandlers(functions); 61 RegisterHandlers(functions);
62} 62}
@@ -78,6 +78,14 @@ void APM::GetPerformanceMode(Kernel::HLERequestContext& ctx) {
78 rb.PushEnum(controller.GetCurrentPerformanceMode()); 78 rb.PushEnum(controller.GetCurrentPerformanceMode());
79} 79}
80 80
81void APM::IsCpuOverclockEnabled(Kernel::HLERequestContext& ctx) {
82 LOG_WARNING(Service_APM, "(STUBBED) called");
83
84 IPC::ResponseBuilder rb{ctx, 3};
85 rb.Push(RESULT_SUCCESS);
86 rb.Push(false);
87}
88
81APM_Sys::APM_Sys(Core::System& system_, Controller& controller_) 89APM_Sys::APM_Sys(Core::System& system_, Controller& controller_)
82 : ServiceFramework{system_, "apm:sys"}, controller{controller_} { 90 : ServiceFramework{system_, "apm:sys"}, controller{controller_} {
83 // clang-format off 91 // clang-format off
diff --git a/src/core/hle/service/apm/interface.h b/src/core/hle/service/apm/interface.h
index 7d57c4978..063ad5308 100644
--- a/src/core/hle/service/apm/interface.h
+++ b/src/core/hle/service/apm/interface.h
@@ -20,6 +20,7 @@ public:
20private: 20private:
21 void OpenSession(Kernel::HLERequestContext& ctx); 21 void OpenSession(Kernel::HLERequestContext& ctx);
22 void GetPerformanceMode(Kernel::HLERequestContext& ctx); 22 void GetPerformanceMode(Kernel::HLERequestContext& ctx);
23 void IsCpuOverclockEnabled(Kernel::HLERequestContext& ctx);
23 24
24 std::shared_ptr<Module> apm; 25 std::shared_ptr<Module> apm;
25 Controller& controller; 26 Controller& controller;
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 145f47ee2..0cd797109 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -70,8 +70,10 @@ public:
70 Kernel::WritableEvent::CreateEventPair(system.Kernel(), "IAudioOutBufferReleased"); 70 Kernel::WritableEvent::CreateEventPair(system.Kernel(), "IAudioOutBufferReleased");
71 71
72 stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate, 72 stream = audio_core.OpenStream(system.CoreTiming(), audio_params.sample_rate,
73 audio_params.channel_count, std::move(unique_name), 73 audio_params.channel_count, std::move(unique_name), [this] {
74 [this] { buffer_event.writable->Signal(); }); 74 const auto guard = LockService();
75 buffer_event.writable->Signal();
76 });
75 } 77 }
76 78
77private: 79private:
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 6e7b7316c..c5c22d053 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -49,16 +49,16 @@ public:
49 49
50 system_event = 50 system_event =
51 Kernel::WritableEvent::CreateEventPair(system.Kernel(), "IAudioRenderer:SystemEvent"); 51 Kernel::WritableEvent::CreateEventPair(system.Kernel(), "IAudioRenderer:SystemEvent");
52 renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), system.Memory(), 52 renderer = std::make_unique<AudioCore::AudioRenderer>(
53 audren_params, system_event.writable, 53 system.CoreTiming(), system.Memory(), audren_params,
54 instance_number); 54 [this]() {
55 const auto guard = LockService();
56 system_event.writable->Signal();
57 },
58 instance_number);
55 } 59 }
56 60
57private: 61private:
58 void UpdateAudioCallback() {
59 system_event.writable->Signal();
60 }
61
62 void GetSampleRate(Kernel::HLERequestContext& ctx) { 62 void GetSampleRate(Kernel::HLERequestContext& ctx) {
63 LOG_DEBUG(Service_Audio, "called"); 63 LOG_DEBUG(Service_Audio, "called");
64 64
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index b3c7234e1..8d95f74e6 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -78,11 +78,13 @@ IAppletResource::IAppletResource(Core::System& system_)
78 pad_update_event = Core::Timing::CreateEvent( 78 pad_update_event = Core::Timing::CreateEvent(
79 "HID::UpdatePadCallback", 79 "HID::UpdatePadCallback",
80 [this](std::uintptr_t user_data, std::chrono::nanoseconds ns_late) { 80 [this](std::uintptr_t user_data, std::chrono::nanoseconds ns_late) {
81 const auto guard = LockService();
81 UpdateControllers(user_data, ns_late); 82 UpdateControllers(user_data, ns_late);
82 }); 83 });
83 motion_update_event = Core::Timing::CreateEvent( 84 motion_update_event = Core::Timing::CreateEvent(
84 "HID::MotionPadCallback", 85 "HID::MotionPadCallback",
85 [this](std::uintptr_t user_data, std::chrono::nanoseconds ns_late) { 86 [this](std::uintptr_t user_data, std::chrono::nanoseconds ns_late) {
87 const auto guard = LockService();
86 UpdateMotion(user_data, ns_late); 88 UpdateMotion(user_data, ns_late);
87 }); 89 });
88 90
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h
index 44a8bc060..5681599ba 100644
--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@@ -31,8 +31,8 @@ public:
31 * @param output A buffer where the output data will be written to. 31 * @param output A buffer where the output data will be written to.
32 * @returns The result code of the ioctl. 32 * @returns The result code of the ioctl.
33 */ 33 */
34 virtual NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 34 virtual NvResult Ioctl1(Ioctl command, const std::vector<u8>& input,
35 IoctlCtrl& ctrl) = 0; 35 std::vector<u8>& output) = 0;
36 36
37 /** 37 /**
38 * Handles an ioctl2 request. 38 * Handles an ioctl2 request.
@@ -43,8 +43,7 @@ public:
43 * @returns The result code of the ioctl. 43 * @returns The result code of the ioctl.
44 */ 44 */
45 virtual NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 45 virtual NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
46 const std::vector<u8>& inline_input, std::vector<u8>& output, 46 const std::vector<u8>& inline_input, std::vector<u8>& output) = 0;
47 IoctlCtrl& ctrl) = 0;
48 47
49 /** 48 /**
50 * Handles an ioctl3 request. 49 * Handles an ioctl3 request.
@@ -55,7 +54,7 @@ public:
55 * @returns The result code of the ioctl. 54 * @returns The result code of the ioctl.
56 */ 55 */
57 virtual NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 56 virtual NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
58 std::vector<u8>& inline_output, IoctlCtrl& ctrl) = 0; 57 std::vector<u8>& inline_output) = 0;
59 58
60protected: 59protected:
61 Core::System& system; 60 Core::System& system;
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 170a7c9a0..ce615c758 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -18,21 +18,20 @@ nvdisp_disp0::nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_de
18 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} 18 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
19nvdisp_disp0 ::~nvdisp_disp0() = default; 19nvdisp_disp0 ::~nvdisp_disp0() = default;
20 20
21NvResult nvdisp_disp0::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 21NvResult nvdisp_disp0::Ioctl1(Ioctl command, const std::vector<u8>& input,
22 IoctlCtrl& ctrl) { 22 std::vector<u8>& output) {
23 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 23 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
24 return NvResult::NotImplemented; 24 return NvResult::NotImplemented;
25} 25}
26 26
27NvResult nvdisp_disp0::Ioctl2(Ioctl command, const std::vector<u8>& input, 27NvResult nvdisp_disp0::Ioctl2(Ioctl command, const std::vector<u8>& input,
28 const std::vector<u8>& inline_input, std::vector<u8>& output, 28 const std::vector<u8>& inline_input, std::vector<u8>& output) {
29 IoctlCtrl& ctrl) {
30 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 29 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
31 return NvResult::NotImplemented; 30 return NvResult::NotImplemented;
32} 31}
33 32
34NvResult nvdisp_disp0::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 33NvResult nvdisp_disp0::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
35 std::vector<u8>& inline_output, IoctlCtrl& ctrl) { 34 std::vector<u8>& inline_output) {
36 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 35 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
37 return NvResult::NotImplemented; 36 return NvResult::NotImplemented;
38} 37}
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index eb7575e40..55a33b7e4 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -20,13 +20,11 @@ public:
20 explicit nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); 20 explicit nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
21 ~nvdisp_disp0() override; 21 ~nvdisp_disp0() override;
22 22
23 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 23 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
24 IoctlCtrl& ctrl) override;
25 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 24 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
26 const std::vector<u8>& inline_input, std::vector<u8>& output, 25 const std::vector<u8>& inline_input, std::vector<u8>& output) override;
27 IoctlCtrl& ctrl) override;
28 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 26 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
29 std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; 27 std::vector<u8>& inline_output) override;
30 28
31 /// Performs a screen flip, drawing the buffer pointed to by the handle. 29 /// Performs a screen flip, drawing the buffer pointed to by the handle.
32 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, 30 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 4e0652c39..6b062e10e 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -21,8 +21,8 @@ nvhost_as_gpu::nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_
21 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} 21 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
22nvhost_as_gpu::~nvhost_as_gpu() = default; 22nvhost_as_gpu::~nvhost_as_gpu() = default;
23 23
24NvResult nvhost_as_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 24NvResult nvhost_as_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input,
25 IoctlCtrl& ctrl) { 25 std::vector<u8>& output) {
26 switch (command.group) { 26 switch (command.group) {
27 case 'A': 27 case 'A':
28 switch (command.cmd) { 28 switch (command.cmd) {
@@ -55,14 +55,13 @@ NvResult nvhost_as_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std:
55} 55}
56 56
57NvResult nvhost_as_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input, 57NvResult nvhost_as_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input,
58 const std::vector<u8>& inline_input, std::vector<u8>& output, 58 const std::vector<u8>& inline_input, std::vector<u8>& output) {
59 IoctlCtrl& ctrl) {
60 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 59 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
61 return NvResult::NotImplemented; 60 return NvResult::NotImplemented;
62} 61}
63 62
64NvResult nvhost_as_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 63NvResult nvhost_as_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
65 std::vector<u8>& inline_output, IoctlCtrl& ctrl) { 64 std::vector<u8>& inline_output) {
66 switch (command.group) { 65 switch (command.group) {
67 case 'A': 66 case 'A':
68 switch (command.cmd) { 67 switch (command.cmd) {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index 2bd355af9..08035fa0e 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -30,13 +30,11 @@ public:
30 explicit nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); 30 explicit nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
31 ~nvhost_as_gpu() override; 31 ~nvhost_as_gpu() override;
32 32
33 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 33 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
34 IoctlCtrl& ctrl) override;
35 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 34 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
36 const std::vector<u8>& inline_input, std::vector<u8>& output, 35 const std::vector<u8>& inline_input, std::vector<u8>& output) override;
37 IoctlCtrl& ctrl) override;
38 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 36 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
39 std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; 37 std::vector<u8>& inline_output) override;
40 38
41private: 39private:
42 class BufferMap final { 40 class BufferMap final {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index 92d31b620..fea3b7b9f 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -20,8 +20,7 @@ nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface,
20 : nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {} 20 : nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {}
21nvhost_ctrl::~nvhost_ctrl() = default; 21nvhost_ctrl::~nvhost_ctrl() = default;
22 22
23NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 23NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
24 IoctlCtrl& ctrl) {
25 switch (command.group) { 24 switch (command.group) {
26 case 0x0: 25 case 0x0:
27 switch (command.cmd) { 26 switch (command.cmd) {
@@ -30,9 +29,9 @@ NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::v
30 case 0x1c: 29 case 0x1c:
31 return IocCtrlClearEventWait(input, output); 30 return IocCtrlClearEventWait(input, output);
32 case 0x1d: 31 case 0x1d:
33 return IocCtrlEventWait(input, output, false, ctrl); 32 return IocCtrlEventWait(input, output, false);
34 case 0x1e: 33 case 0x1e:
35 return IocCtrlEventWait(input, output, true, ctrl); 34 return IocCtrlEventWait(input, output, true);
36 case 0x1f: 35 case 0x1f:
37 return IocCtrlEventRegister(input, output); 36 return IocCtrlEventRegister(input, output);
38 case 0x20: 37 case 0x20:
@@ -48,14 +47,13 @@ NvResult nvhost_ctrl::Ioctl1(Ioctl command, const std::vector<u8>& input, std::v
48} 47}
49 48
50NvResult nvhost_ctrl::Ioctl2(Ioctl command, const std::vector<u8>& input, 49NvResult nvhost_ctrl::Ioctl2(Ioctl command, const std::vector<u8>& input,
51 const std::vector<u8>& inline_input, std::vector<u8>& output, 50 const std::vector<u8>& inline_input, std::vector<u8>& output) {
52 IoctlCtrl& ctrl) {
53 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 51 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
54 return NvResult::NotImplemented; 52 return NvResult::NotImplemented;
55} 53}
56 54
57NvResult nvhost_ctrl::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 55NvResult nvhost_ctrl::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
58 std::vector<u8>& inline_output, IoctlCtrl& ctrl) { 56 std::vector<u8>& inline_outpu) {
59 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 57 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
60 return NvResult::NotImplemented; 58 return NvResult::NotImplemented;
61} 59}
@@ -69,7 +67,7 @@ NvResult nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector
69} 67}
70 68
71NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, 69NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output,
72 bool is_async, IoctlCtrl& ctrl) { 70 bool is_async) {
73 IocCtrlEventWaitParams params{}; 71 IocCtrlEventWaitParams params{};
74 std::memcpy(&params, input.data(), sizeof(params)); 72 std::memcpy(&params, input.data(), sizeof(params));
75 LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_async={}", 73 LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_async={}",
@@ -141,12 +139,6 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector
141 params.value |= event_id; 139 params.value |= event_id;
142 event.event.writable->Clear(); 140 event.event.writable->Clear();
143 gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); 141 gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
144 if (!is_async && ctrl.fresh_call) {
145 ctrl.must_delay = true;
146 ctrl.timeout = params.timeout;
147 ctrl.event_id = event_id;
148 return NvResult::Timeout;
149 }
150 std::memcpy(output.data(), &params, sizeof(params)); 142 std::memcpy(output.data(), &params, sizeof(params));
151 return NvResult::Timeout; 143 return NvResult::Timeout;
152 } 144 }
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index 107168e21..c5aa1362a 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -18,13 +18,11 @@ public:
18 SyncpointManager& syncpoint_manager); 18 SyncpointManager& syncpoint_manager);
19 ~nvhost_ctrl() override; 19 ~nvhost_ctrl() override;
20 20
21 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 21 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
22 IoctlCtrl& ctrl) override;
23 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 22 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
24 const std::vector<u8>& inline_input, std::vector<u8>& output, 23 const std::vector<u8>& inline_input, std::vector<u8>& output) override;
25 IoctlCtrl& ctrl) override;
26 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 24 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
27 std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; 25 std::vector<u8>& inline_output) override;
28 26
29private: 27private:
30 struct IocSyncptReadParams { 28 struct IocSyncptReadParams {
@@ -123,8 +121,7 @@ private:
123 static_assert(sizeof(IocCtrlEventKill) == 8, "IocCtrlEventKill is incorrect size"); 121 static_assert(sizeof(IocCtrlEventKill) == 8, "IocCtrlEventKill is incorrect size");
124 122
125 NvResult NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output); 123 NvResult NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output);
126 NvResult IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async, 124 NvResult IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async);
127 IoctlCtrl& ctrl);
128 NvResult IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output); 125 NvResult IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output);
129 NvResult IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output); 126 NvResult IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output);
130 NvResult IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output); 127 NvResult IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index 647f5907e..0320d3ae2 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -16,7 +16,7 @@ nvhost_ctrl_gpu::nvhost_ctrl_gpu(Core::System& system) : nvdevice(system) {}
16nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default; 16nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default;
17 17
18NvResult nvhost_ctrl_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, 18NvResult nvhost_ctrl_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input,
19 std::vector<u8>& output, IoctlCtrl& ctrl) { 19 std::vector<u8>& output) {
20 switch (command.group) { 20 switch (command.group) {
21 case 'G': 21 case 'G':
22 switch (command.cmd) { 22 switch (command.cmd) {
@@ -48,15 +48,13 @@ NvResult nvhost_ctrl_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input,
48} 48}
49 49
50NvResult nvhost_ctrl_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input, 50NvResult nvhost_ctrl_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input,
51 const std::vector<u8>& inline_input, std::vector<u8>& output, 51 const std::vector<u8>& inline_input, std::vector<u8>& output) {
52 IoctlCtrl& ctrl) {
53 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 52 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
54 return NvResult::NotImplemented; 53 return NvResult::NotImplemented;
55} 54}
56 55
57NvResult nvhost_ctrl_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, 56NvResult nvhost_ctrl_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input,
58 std::vector<u8>& output, std::vector<u8>& inline_output, 57 std::vector<u8>& output, std::vector<u8>& inline_output) {
59 IoctlCtrl& ctrl) {
60 switch (command.group) { 58 switch (command.group) {
61 case 'G': 59 case 'G':
62 switch (command.cmd) { 60 switch (command.cmd) {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
index c2fffe734..137b88238 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -16,13 +16,11 @@ public:
16 explicit nvhost_ctrl_gpu(Core::System& system); 16 explicit nvhost_ctrl_gpu(Core::System& system);
17 ~nvhost_ctrl_gpu() override; 17 ~nvhost_ctrl_gpu() override;
18 18
19 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 19 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
20 IoctlCtrl& ctrl) override;
21 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 20 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
22 const std::vector<u8>& inline_input, std::vector<u8>& output, 21 const std::vector<u8>& inline_input, std::vector<u8>& output) override;
23 IoctlCtrl& ctrl) override;
24 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 22 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
25 std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; 23 std::vector<u8>& inline_output) override;
26 24
27private: 25private:
28 struct IoctlGpuCharacteristics { 26 struct IoctlGpuCharacteristics {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index b0c2caba5..af8b3d9f1 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -23,8 +23,7 @@ nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
23 23
24nvhost_gpu::~nvhost_gpu() = default; 24nvhost_gpu::~nvhost_gpu() = default;
25 25
26NvResult nvhost_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 26NvResult nvhost_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
27 IoctlCtrl& ctrl) {
28 switch (command.group) { 27 switch (command.group) {
29 case 0x0: 28 case 0x0:
30 switch (command.cmd) { 29 switch (command.cmd) {
@@ -76,8 +75,7 @@ NvResult nvhost_gpu::Ioctl1(Ioctl command, const std::vector<u8>& input, std::ve
76}; 75};
77 76
78NvResult nvhost_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input, 77NvResult nvhost_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input,
79 const std::vector<u8>& inline_input, std::vector<u8>& output, 78 const std::vector<u8>& inline_input, std::vector<u8>& output) {
80 IoctlCtrl& ctrl) {
81 switch (command.group) { 79 switch (command.group) {
82 case 'H': 80 case 'H':
83 switch (command.cmd) { 81 switch (command.cmd) {
@@ -91,7 +89,7 @@ NvResult nvhost_gpu::Ioctl2(Ioctl command, const std::vector<u8>& input,
91} 89}
92 90
93NvResult nvhost_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 91NvResult nvhost_gpu::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
94 std::vector<u8>& inline_output, IoctlCtrl& ctrl) { 92 std::vector<u8>& inline_output) {
95 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 93 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
96 return NvResult::NotImplemented; 94 return NvResult::NotImplemented;
97} 95}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index aa0048a9d..e0298b4fe 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -26,13 +26,11 @@ public:
26 SyncpointManager& syncpoint_manager); 26 SyncpointManager& syncpoint_manager);
27 ~nvhost_gpu() override; 27 ~nvhost_gpu() override;
28 28
29 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 29 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
30 IoctlCtrl& ctrl) override;
31 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 30 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
32 const std::vector<u8>& inline_input, std::vector<u8>& output, 31 const std::vector<u8>& inline_input, std::vector<u8>& output) override;
33 IoctlCtrl& ctrl) override;
34 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 32 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
35 std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; 33 std::vector<u8>& inline_output) override;
36 34
37private: 35private:
38 enum class CtxObjects : u32_le { 36 enum class CtxObjects : u32_le {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
index b8328c314..36970f828 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -11,12 +11,13 @@
11 11
12namespace Service::Nvidia::Devices { 12namespace Service::Nvidia::Devices {
13 13
14nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) 14nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
15 : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} 15 SyncpointManager& syncpoint_manager)
16 : nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {}
16nvhost_nvdec::~nvhost_nvdec() = default; 17nvhost_nvdec::~nvhost_nvdec() = default;
17 18
18NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 19NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input,
19 IoctlCtrl& ctrl) { 20 std::vector<u8>& output) {
20 switch (command.group) { 21 switch (command.group) {
21 case 0x0: 22 case 0x0:
22 switch (command.cmd) { 23 switch (command.cmd) {
@@ -58,14 +59,13 @@ NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input, std::
58} 59}
59 60
60NvResult nvhost_nvdec::Ioctl2(Ioctl command, const std::vector<u8>& input, 61NvResult nvhost_nvdec::Ioctl2(Ioctl command, const std::vector<u8>& input,
61 const std::vector<u8>& inline_input, std::vector<u8>& output, 62 const std::vector<u8>& inline_input, std::vector<u8>& output) {
62 IoctlCtrl& ctrl) {
63 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 63 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
64 return NvResult::NotImplemented; 64 return NvResult::NotImplemented;
65} 65}
66 66
67NvResult nvhost_nvdec::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 67NvResult nvhost_nvdec::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
68 std::vector<u8>& inline_output, IoctlCtrl& ctrl) { 68 std::vector<u8>& inline_output) {
69 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 69 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
70 return NvResult::NotImplemented; 70 return NvResult::NotImplemented;
71} 71}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
index 884ed6c5b..77ef53cdd 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -11,16 +11,15 @@ namespace Service::Nvidia::Devices {
11 11
12class nvhost_nvdec final : public nvhost_nvdec_common { 12class nvhost_nvdec final : public nvhost_nvdec_common {
13public: 13public:
14 explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); 14 explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
15 SyncpointManager& syncpoint_manager);
15 ~nvhost_nvdec() override; 16 ~nvhost_nvdec() override;
16 17
17 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 18 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
18 IoctlCtrl& ctrl) override;
19 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 19 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
20 const std::vector<u8>& inline_input, std::vector<u8>& output, 20 const std::vector<u8>& inline_input, std::vector<u8>& output) override;
21 IoctlCtrl& ctrl) override;
22 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 21 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
23 std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; 22 std::vector<u8>& inline_output) override;
24}; 23};
25 24
26} // namespace Service::Nvidia::Devices 25} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
index b49cecb42..4898dc27a 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -11,6 +11,7 @@
11#include "core/core.h" 11#include "core/core.h"
12#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" 12#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
13#include "core/hle/service/nvdrv/devices/nvmap.h" 13#include "core/hle/service/nvdrv/devices/nvmap.h"
14#include "core/hle/service/nvdrv/syncpoint_manager.h"
14#include "core/memory.h" 15#include "core/memory.h"
15#include "video_core/memory_manager.h" 16#include "video_core/memory_manager.h"
16#include "video_core/renderer_base.h" 17#include "video_core/renderer_base.h"
@@ -36,8 +37,9 @@ std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::s
36} 37}
37} // Anonymous namespace 38} // Anonymous namespace
38 39
39nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) 40nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
40 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} 41 SyncpointManager& syncpoint_manager)
42 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager(syncpoint_manager) {}
41nvhost_nvdec_common::~nvhost_nvdec_common() = default; 43nvhost_nvdec_common::~nvhost_nvdec_common() = default;
42 44
43NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) { 45NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) {
@@ -71,10 +73,15 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
71 offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset); 73 offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset);
72 offset = SpliceVectors(input, fences, params.fence_count, offset); 74 offset = SpliceVectors(input, fences, params.fence_count, offset);
73 75
74 // TODO(ameerj): For async gpu, utilize fences for syncpoint 'max' increment
75
76 auto& gpu = system.GPU(); 76 auto& gpu = system.GPU();
77 77 if (gpu.UseNvdec()) {
78 for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
79 const SyncptIncr& syncpt_incr = syncpt_increments[i];
80 fences[i].id = syncpt_incr.id;
81 fences[i].value =
82 syncpoint_manager.IncreaseSyncpoint(syncpt_incr.id, syncpt_incr.increments);
83 }
84 }
78 for (const auto& cmd_buffer : command_buffers) { 85 for (const auto& cmd_buffer : command_buffers) {
79 auto object = nvmap_dev->GetObject(cmd_buffer.memory_id); 86 auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);
80 ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); 87 ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
@@ -89,7 +96,13 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
89 cmdlist.size() * sizeof(u32)); 96 cmdlist.size() * sizeof(u32));
90 gpu.PushCommandBuffer(cmdlist); 97 gpu.PushCommandBuffer(cmdlist);
91 } 98 }
99 if (gpu.UseNvdec()) {
92 100
101 fences[0].value = syncpoint_manager.IncreaseSyncpoint(fences[0].id, 1);
102
103 Tegra::ChCommandHeaderList cmdlist{{(4 << 28) | fences[0].id}};
104 gpu.PushCommandBuffer(cmdlist);
105 }
93 std::memcpy(output.data(), &params, sizeof(IoctlSubmit)); 106 std::memcpy(output.data(), &params, sizeof(IoctlSubmit));
94 // Some games expect command_buffers to be written back 107 // Some games expect command_buffers to be written back
95 offset = sizeof(IoctlSubmit); 108 offset = sizeof(IoctlSubmit);
@@ -98,6 +111,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
98 offset = WriteVectors(output, reloc_shifts, offset); 111 offset = WriteVectors(output, reloc_shifts, offset);
99 offset = WriteVectors(output, syncpt_increments, offset); 112 offset = WriteVectors(output, syncpt_increments, offset);
100 offset = WriteVectors(output, wait_checks, offset); 113 offset = WriteVectors(output, wait_checks, offset);
114 offset = WriteVectors(output, fences, offset);
101 115
102 return NvResult::Success; 116 return NvResult::Success;
103} 117}
@@ -107,9 +121,10 @@ NvResult nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::ve
107 std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint)); 121 std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
108 LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); 122 LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param);
109 123
110 // We found that implementing this causes deadlocks with async gpu, along with degraded 124 if (device_syncpoints[params.param] == 0 && system.GPU().UseNvdec()) {
111 // performance. TODO: RE the nvdec async implementation 125 device_syncpoints[params.param] = syncpoint_manager.AllocateSyncpoint();
112 params.value = 0; 126 }
127 params.value = device_syncpoints[params.param];
113 std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint)); 128 std::memcpy(output.data(), &params, sizeof(IoctlGetSyncpoint));
114 129
115 return NvResult::Success; 130 return NvResult::Success;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
index d9f95ba58..4c9d4ba41 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
@@ -10,12 +10,16 @@
10#include "common/swap.h" 10#include "common/swap.h"
11#include "core/hle/service/nvdrv/devices/nvdevice.h" 11#include "core/hle/service/nvdrv/devices/nvdevice.h"
12 12
13namespace Service::Nvidia::Devices { 13namespace Service::Nvidia {
14class SyncpointManager;
15
16namespace Devices {
14class nvmap; 17class nvmap;
15 18
16class nvhost_nvdec_common : public nvdevice { 19class nvhost_nvdec_common : public nvdevice {
17public: 20public:
18 explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); 21 explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
22 SyncpointManager& syncpoint_manager);
19 ~nvhost_nvdec_common() override; 23 ~nvhost_nvdec_common() override;
20 24
21protected: 25protected:
@@ -157,8 +161,10 @@ protected:
157 s32_le nvmap_fd{}; 161 s32_le nvmap_fd{};
158 u32_le submit_timeout{}; 162 u32_le submit_timeout{};
159 std::shared_ptr<nvmap> nvmap_dev; 163 std::shared_ptr<nvmap> nvmap_dev;
160 164 SyncpointManager& syncpoint_manager;
165 std::array<u32, MaxSyncPoints> device_syncpoints{};
161 // This is expected to be ordered, therefore we must use a map, not unordered_map 166 // This is expected to be ordered, therefore we must use a map, not unordered_map
162 std::map<GPUVAddr, BufferMap> buffer_mappings; 167 std::map<GPUVAddr, BufferMap> buffer_mappings;
163}; 168};
164}; // namespace Service::Nvidia::Devices 169}; // namespace Devices
170} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
index 6f4ab0ab3..2d06955c0 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
@@ -13,8 +13,8 @@ namespace Service::Nvidia::Devices {
13nvhost_nvjpg::nvhost_nvjpg(Core::System& system) : nvdevice(system) {} 13nvhost_nvjpg::nvhost_nvjpg(Core::System& system) : nvdevice(system) {}
14nvhost_nvjpg::~nvhost_nvjpg() = default; 14nvhost_nvjpg::~nvhost_nvjpg() = default;
15 15
16NvResult nvhost_nvjpg::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 16NvResult nvhost_nvjpg::Ioctl1(Ioctl command, const std::vector<u8>& input,
17 IoctlCtrl& ctrl) { 17 std::vector<u8>& output) {
18 switch (command.group) { 18 switch (command.group) {
19 case 'H': 19 case 'H':
20 switch (command.cmd) { 20 switch (command.cmd) {
@@ -33,14 +33,13 @@ NvResult nvhost_nvjpg::Ioctl1(Ioctl command, const std::vector<u8>& input, std::
33} 33}
34 34
35NvResult nvhost_nvjpg::Ioctl2(Ioctl command, const std::vector<u8>& input, 35NvResult nvhost_nvjpg::Ioctl2(Ioctl command, const std::vector<u8>& input,
36 const std::vector<u8>& inline_input, std::vector<u8>& output, 36 const std::vector<u8>& inline_input, std::vector<u8>& output) {
37 IoctlCtrl& ctrl) {
38 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 37 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
39 return NvResult::NotImplemented; 38 return NvResult::NotImplemented;
40} 39}
41 40
42NvResult nvhost_nvjpg::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 41NvResult nvhost_nvjpg::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
43 std::vector<u8>& inline_output, IoctlCtrl& ctrl) { 42 std::vector<u8>& inline_output) {
44 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 43 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
45 return NvResult::NotImplemented; 44 return NvResult::NotImplemented;
46} 45}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
index 6fb99d959..43948d18d 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
@@ -16,13 +16,11 @@ public:
16 explicit nvhost_nvjpg(Core::System& system); 16 explicit nvhost_nvjpg(Core::System& system);
17 ~nvhost_nvjpg() override; 17 ~nvhost_nvjpg() override;
18 18
19 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 19 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
20 IoctlCtrl& ctrl) override;
21 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 20 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
22 const std::vector<u8>& inline_input, std::vector<u8>& output, 21 const std::vector<u8>& inline_input, std::vector<u8>& output) override;
23 IoctlCtrl& ctrl) override;
24 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 22 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
25 std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; 23 std::vector<u8>& inline_output) override;
26 24
27private: 25private:
28 struct IoctlSetNvmapFD { 26 struct IoctlSetNvmapFD {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
index 55a17f423..72499654c 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -10,13 +10,13 @@
10#include "video_core/renderer_base.h" 10#include "video_core/renderer_base.h"
11 11
12namespace Service::Nvidia::Devices { 12namespace Service::Nvidia::Devices {
13nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) 13nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
14 : nvhost_nvdec_common(system, std::move(nvmap_dev)) {} 14 SyncpointManager& syncpoint_manager)
15 : nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {}
15 16
16nvhost_vic::~nvhost_vic() = default; 17nvhost_vic::~nvhost_vic() = default;
17 18
18NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 19NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
19 IoctlCtrl& ctrl) {
20 switch (command.group) { 20 switch (command.group) {
21 case 0x0: 21 case 0x0:
22 switch (command.cmd) { 22 switch (command.cmd) {
@@ -51,14 +51,13 @@ NvResult nvhost_vic::Ioctl1(Ioctl command, const std::vector<u8>& input, std::ve
51} 51}
52 52
53NvResult nvhost_vic::Ioctl2(Ioctl command, const std::vector<u8>& input, 53NvResult nvhost_vic::Ioctl2(Ioctl command, const std::vector<u8>& input,
54 const std::vector<u8>& inline_input, std::vector<u8>& output, 54 const std::vector<u8>& inline_input, std::vector<u8>& output) {
55 IoctlCtrl& ctrl) {
56 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 55 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
57 return NvResult::NotImplemented; 56 return NvResult::NotImplemented;
58} 57}
59 58
60NvResult nvhost_vic::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 59NvResult nvhost_vic::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
61 std::vector<u8>& inline_output, IoctlCtrl& ctrl) { 60 std::vector<u8>& inline_output) {
62 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 61 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
63 return NvResult::NotImplemented; 62 return NvResult::NotImplemented;
64} 63}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
index 7f4858cd4..f401c61fa 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -7,19 +7,17 @@
7#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h" 7#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
8 8
9namespace Service::Nvidia::Devices { 9namespace Service::Nvidia::Devices {
10class nvmap;
11 10
12class nvhost_vic final : public nvhost_nvdec_common { 11class nvhost_vic final : public nvhost_nvdec_common {
13public: 12public:
14 explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); 13 explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
14 SyncpointManager& syncpoint_manager);
15 ~nvhost_vic(); 15 ~nvhost_vic();
16 16
17 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 17 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
18 IoctlCtrl& ctrl) override;
19 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 18 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
20 const std::vector<u8>& inline_input, std::vector<u8>& output, 19 const std::vector<u8>& inline_input, std::vector<u8>& output) override;
21 IoctlCtrl& ctrl) override;
22 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 20 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
23 std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; 21 std::vector<u8>& inline_output) override;
24}; 22};
25} // namespace Service::Nvidia::Devices 23} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp
index 910cfee51..4015a2740 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -19,8 +19,7 @@ nvmap::nvmap(Core::System& system) : nvdevice(system) {
19 19
20nvmap::~nvmap() = default; 20nvmap::~nvmap() = default;
21 21
22NvResult nvmap::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 22NvResult nvmap::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) {
23 IoctlCtrl& ctrl) {
24 switch (command.group) { 23 switch (command.group) {
25 case 0x1: 24 case 0x1:
26 switch (command.cmd) { 25 switch (command.cmd) {
@@ -49,14 +48,13 @@ NvResult nvmap::Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<
49} 48}
50 49
51NvResult nvmap::Ioctl2(Ioctl command, const std::vector<u8>& input, 50NvResult nvmap::Ioctl2(Ioctl command, const std::vector<u8>& input,
52 const std::vector<u8>& inline_input, std::vector<u8>& output, 51 const std::vector<u8>& inline_input, std::vector<u8>& output) {
53 IoctlCtrl& ctrl) {
54 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 52 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
55 return NvResult::NotImplemented; 53 return NvResult::NotImplemented;
56} 54}
57 55
58NvResult nvmap::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 56NvResult nvmap::Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
59 std::vector<u8>& inline_output, IoctlCtrl& ctrl) { 57 std::vector<u8>& inline_output) {
60 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 58 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
61 return NvResult::NotImplemented; 59 return NvResult::NotImplemented;
62} 60}
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h
index c0c2fa5eb..4484bd79f 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@@ -19,13 +19,11 @@ public:
19 explicit nvmap(Core::System& system); 19 explicit nvmap(Core::System& system);
20 ~nvmap() override; 20 ~nvmap() override;
21 21
22 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 22 NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
23 IoctlCtrl& ctrl) override;
24 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input, 23 NvResult Ioctl2(Ioctl command, const std::vector<u8>& input,
25 const std::vector<u8>& inline_input, std::vector<u8>& output, 24 const std::vector<u8>& inline_input, std::vector<u8>& output) override;
26 IoctlCtrl& ctrl) override;
27 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output, 25 NvResult Ioctl3(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
28 std::vector<u8>& inline_output, IoctlCtrl& ctrl) override; 26 std::vector<u8>& inline_output) override;
29 27
30 /// Returns the allocated address of an nvmap object given its handle. 28 /// Returns the allocated address of an nvmap object given its handle.
31 VAddr GetObjectAddress(u32 handle) const; 29 VAddr GetObjectAddress(u32 handle) const;
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp
index d72c531f6..cc23b001c 100644
--- a/src/core/hle/service/nvdrv/interface.cpp
+++ b/src/core/hle/service/nvdrv/interface.cpp
@@ -61,32 +61,9 @@ void NVDRV::Ioctl1(Kernel::HLERequestContext& ctx) {
61 std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); 61 std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
62 const auto input_buffer = ctx.ReadBuffer(0); 62 const auto input_buffer = ctx.ReadBuffer(0);
63 63
64 IoctlCtrl ctrl{}; 64 const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer);
65 65 if (command.is_out != 0) {
66 const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer, ctrl); 66 ctx.WriteBuffer(output_buffer);
67 if (ctrl.must_delay) {
68 ctrl.fresh_call = false;
69 ctx.SleepClientThread(
70 "NVServices::DelayedResponse", ctrl.timeout,
71 [=, this](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx_,
72 Kernel::ThreadWakeupReason reason) {
73 IoctlCtrl ctrl2{ctrl};
74 std::vector<u8> tmp_output = output_buffer;
75 const auto nv_result2 = nvdrv->Ioctl1(fd, command, input_buffer, tmp_output, ctrl2);
76
77 if (command.is_out != 0) {
78 ctx.WriteBuffer(tmp_output);
79 }
80
81 IPC::ResponseBuilder rb{ctx_, 3};
82 rb.Push(RESULT_SUCCESS);
83 rb.PushEnum(nv_result2);
84 },
85 nvdrv->GetEventWriteable(ctrl.event_id));
86 } else {
87 if (command.is_out != 0) {
88 ctx.WriteBuffer(output_buffer);
89 }
90 } 67 }
91 68
92 IPC::ResponseBuilder rb{ctx, 3}; 69 IPC::ResponseBuilder rb{ctx, 3};
@@ -110,36 +87,8 @@ void NVDRV::Ioctl2(Kernel::HLERequestContext& ctx) {
110 const auto input_inlined_buffer = ctx.ReadBuffer(1); 87 const auto input_inlined_buffer = ctx.ReadBuffer(1);
111 std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); 88 std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
112 89
113 IoctlCtrl ctrl{};
114
115 const auto nv_result = 90 const auto nv_result =
116 nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer, ctrl); 91 nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer);
117 if (ctrl.must_delay) {
118 ctrl.fresh_call = false;
119 ctx.SleepClientThread(
120 "NVServices::DelayedResponse", ctrl.timeout,
121 [=, this](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx_,
122 Kernel::ThreadWakeupReason reason) {
123 IoctlCtrl ctrl2{ctrl};
124 std::vector<u8> tmp_output = output_buffer;
125 const auto nv_result2 = nvdrv->Ioctl2(fd, command, input_buffer,
126 input_inlined_buffer, tmp_output, ctrl2);
127
128 if (command.is_out != 0) {
129 ctx.WriteBuffer(tmp_output);
130 }
131
132 IPC::ResponseBuilder rb{ctx_, 3};
133 rb.Push(RESULT_SUCCESS);
134 rb.PushEnum(nv_result2);
135 },
136 nvdrv->GetEventWriteable(ctrl.event_id));
137 } else {
138 if (command.is_out != 0) {
139 ctx.WriteBuffer(output_buffer);
140 }
141 }
142
143 if (command.is_out != 0) { 92 if (command.is_out != 0) {
144 ctx.WriteBuffer(output_buffer); 93 ctx.WriteBuffer(output_buffer);
145 } 94 }
@@ -165,36 +114,11 @@ void NVDRV::Ioctl3(Kernel::HLERequestContext& ctx) {
165 std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); 114 std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0));
166 std::vector<u8> output_buffer_inline(ctx.GetWriteBufferSize(1)); 115 std::vector<u8> output_buffer_inline(ctx.GetWriteBufferSize(1));
167 116
168 IoctlCtrl ctrl{};
169 const auto nv_result = 117 const auto nv_result =
170 nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, output_buffer_inline, ctrl); 118 nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, output_buffer_inline);
171 if (ctrl.must_delay) { 119 if (command.is_out != 0) {
172 ctrl.fresh_call = false; 120 ctx.WriteBuffer(output_buffer, 0);
173 ctx.SleepClientThread( 121 ctx.WriteBuffer(output_buffer_inline, 1);
174 "NVServices::DelayedResponse", ctrl.timeout,
175 [=, this](std::shared_ptr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx_,
176 Kernel::ThreadWakeupReason reason) {
177 IoctlCtrl ctrl2{ctrl};
178 std::vector<u8> tmp_output = output_buffer;
179 std::vector<u8> tmp_output2 = output_buffer;
180 const auto nv_result2 =
181 nvdrv->Ioctl3(fd, command, input_buffer, tmp_output, tmp_output2, ctrl2);
182
183 if (command.is_out != 0) {
184 ctx.WriteBuffer(tmp_output, 0);
185 ctx.WriteBuffer(tmp_output2, 1);
186 }
187
188 IPC::ResponseBuilder rb{ctx_, 3};
189 rb.Push(RESULT_SUCCESS);
190 rb.PushEnum(nv_result2);
191 },
192 nvdrv->GetEventWriteable(ctrl.event_id));
193 } else {
194 if (command.is_out != 0) {
195 ctx.WriteBuffer(output_buffer, 0);
196 ctx.WriteBuffer(output_buffer_inline, 1);
197 }
198 } 122 }
199 123
200 IPC::ResponseBuilder rb{ctx, 3}; 124 IPC::ResponseBuilder rb{ctx, 3};
diff --git a/src/core/hle/service/nvdrv/nvdata.h b/src/core/hle/service/nvdrv/nvdata.h
index a3c4ecd85..3294bc0e7 100644
--- a/src/core/hle/service/nvdrv/nvdata.h
+++ b/src/core/hle/service/nvdrv/nvdata.h
@@ -97,15 +97,4 @@ union Ioctl {
97 BitField<31, 1, u32> is_out; 97 BitField<31, 1, u32> is_out;
98}; 98};
99 99
100struct IoctlCtrl {
101 // First call done to the servioce for services that call itself again after a call.
102 bool fresh_call{true};
103 // Tells the Ioctl Wrapper that it must delay the IPC response and send the thread to sleep
104 bool must_delay{};
105 // Timeout for the delay
106 s64 timeout{};
107 // NV Event Id
108 s32 event_id{-1};
109};
110
111} // namespace Service::Nvidia 100} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 8e0c9f093..620c18728 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -55,9 +55,11 @@ Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} {
55 devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); 55 devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
56 devices["/dev/nvhost-ctrl"] = 56 devices["/dev/nvhost-ctrl"] =
57 std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager); 57 std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager);
58 devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev); 58 devices["/dev/nvhost-nvdec"] =
59 std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev, syncpoint_manager);
59 devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); 60 devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
60 devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev); 61 devices["/dev/nvhost-vic"] =
62 std::make_shared<Devices::nvhost_vic>(system, nvmap_dev, syncpoint_manager);
61} 63}
62 64
63Module::~Module() = default; 65Module::~Module() = default;
@@ -91,7 +93,7 @@ DeviceFD Module::Open(const std::string& device_name) {
91} 93}
92 94
93NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, 95NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
94 std::vector<u8>& output, IoctlCtrl& ctrl) { 96 std::vector<u8>& output) {
95 if (fd < 0) { 97 if (fd < 0) {
96 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); 98 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
97 return NvResult::InvalidState; 99 return NvResult::InvalidState;
@@ -104,12 +106,11 @@ NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input
104 return NvResult::NotImplemented; 106 return NvResult::NotImplemented;
105 } 107 }
106 108
107 return itr->second->Ioctl1(command, input, output, ctrl); 109 return itr->second->Ioctl1(command, input, output);
108} 110}
109 111
110NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input, 112NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
111 const std::vector<u8>& inline_input, std::vector<u8>& output, 113 const std::vector<u8>& inline_input, std::vector<u8>& output) {
112 IoctlCtrl& ctrl) {
113 if (fd < 0) { 114 if (fd < 0) {
114 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); 115 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
115 return NvResult::InvalidState; 116 return NvResult::InvalidState;
@@ -122,11 +123,11 @@ NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input
122 return NvResult::NotImplemented; 123 return NvResult::NotImplemented;
123 } 124 }
124 125
125 return itr->second->Ioctl2(command, input, inline_input, output, ctrl); 126 return itr->second->Ioctl2(command, input, inline_input, output);
126} 127}
127 128
128NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input, 129NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
129 std::vector<u8>& output, std::vector<u8>& inline_output, IoctlCtrl& ctrl) { 130 std::vector<u8>& output, std::vector<u8>& inline_output) {
130 if (fd < 0) { 131 if (fd < 0) {
131 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); 132 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
132 return NvResult::InvalidState; 133 return NvResult::InvalidState;
@@ -139,7 +140,7 @@ NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input
139 return NvResult::NotImplemented; 140 return NvResult::NotImplemented;
140 } 141 }
141 142
142 return itr->second->Ioctl3(command, input, output, inline_output, ctrl); 143 return itr->second->Ioctl3(command, input, output, inline_output);
143} 144}
144 145
145NvResult Module::Close(DeviceFD fd) { 146NvResult Module::Close(DeviceFD fd) {
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h
index 5985d2179..144e657e5 100644
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -119,13 +119,13 @@ public:
119 119
120 /// Sends an ioctl command to the specified file descriptor. 120 /// Sends an ioctl command to the specified file descriptor.
121 NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, 121 NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
122 std::vector<u8>& output, IoctlCtrl& ctrl); 122 std::vector<u8>& output);
123 123
124 NvResult Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input, 124 NvResult Ioctl2(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
125 const std::vector<u8>& inline_input, std::vector<u8>& output, IoctlCtrl& ctrl); 125 const std::vector<u8>& inline_input, std::vector<u8>& output);
126 126
127 NvResult Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input, 127 NvResult Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
128 std::vector<u8>& output, std::vector<u8>& inline_output, IoctlCtrl& ctrl); 128 std::vector<u8>& output, std::vector<u8>& inline_output);
129 129
130 /// Closes a device file descriptor and returns operation success. 130 /// Closes a device file descriptor and returns operation success.
131 NvResult Close(DeviceFD fd); 131 NvResult Close(DeviceFD fd);
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index 377f47e8e..5578181a4 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -25,7 +25,12 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
25 ASSERT(slot < buffer_slots); 25 ASSERT(slot < buffer_slots);
26 LOG_WARNING(Service, "Adding graphics buffer {}", slot); 26 LOG_WARNING(Service, "Adding graphics buffer {}", slot);
27 27
28 free_buffers.push_back(slot); 28 {
29 std::unique_lock lock{free_buffers_mutex};
30 free_buffers.push_back(slot);
31 }
32 free_buffers_condition.notify_one();
33
29 buffers[slot] = { 34 buffers[slot] = {
30 .slot = slot, 35 .slot = slot,
31 .status = Buffer::Status::Free, 36 .status = Buffer::Status::Free,
@@ -41,10 +46,20 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
41 46
42std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width, 47std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width,
43 u32 height) { 48 u32 height) {
49 // Wait for first request before trying to dequeue
50 {
51 std::unique_lock lock{free_buffers_mutex};
52 free_buffers_condition.wait(lock, [this] { return !free_buffers.empty() || !is_connect; });
53 }
44 54
45 if (free_buffers.empty()) { 55 if (!is_connect) {
56 // Buffer was disconnected while the thread was blocked, this is most likely due to
57 // emulation being stopped
46 return std::nullopt; 58 return std::nullopt;
47 } 59 }
60
61 std::unique_lock lock{free_buffers_mutex};
62
48 auto f_itr = free_buffers.begin(); 63 auto f_itr = free_buffers.begin();
49 auto slot = buffers.size(); 64 auto slot = buffers.size();
50 65
@@ -85,6 +100,7 @@ void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
85 buffers[slot].crop_rect = crop_rect; 100 buffers[slot].crop_rect = crop_rect;
86 buffers[slot].swap_interval = swap_interval; 101 buffers[slot].swap_interval = swap_interval;
87 buffers[slot].multi_fence = multi_fence; 102 buffers[slot].multi_fence = multi_fence;
103 std::unique_lock lock{queue_sequence_mutex};
88 queue_sequence.push_back(slot); 104 queue_sequence.push_back(slot);
89} 105}
90 106
@@ -97,12 +113,17 @@ void BufferQueue::CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& mult
97 buffers[slot].multi_fence = multi_fence; 113 buffers[slot].multi_fence = multi_fence;
98 buffers[slot].swap_interval = 0; 114 buffers[slot].swap_interval = 0;
99 115
100 free_buffers.push_back(slot); 116 {
117 std::unique_lock lock{free_buffers_mutex};
118 free_buffers.push_back(slot);
119 }
120 free_buffers_condition.notify_one();
101 121
102 buffer_wait_event.writable->Signal(); 122 buffer_wait_event.writable->Signal();
103} 123}
104 124
105std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() { 125std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() {
126 std::unique_lock lock{queue_sequence_mutex};
106 std::size_t buffer_slot = buffers.size(); 127 std::size_t buffer_slot = buffers.size();
107 // Iterate to find a queued buffer matching the requested slot. 128 // Iterate to find a queued buffer matching the requested slot.
108 while (buffer_slot == buffers.size() && !queue_sequence.empty()) { 129 while (buffer_slot == buffers.size() && !queue_sequence.empty()) {
@@ -127,15 +148,30 @@ void BufferQueue::ReleaseBuffer(u32 slot) {
127 ASSERT(buffers[slot].slot == slot); 148 ASSERT(buffers[slot].slot == slot);
128 149
129 buffers[slot].status = Buffer::Status::Free; 150 buffers[slot].status = Buffer::Status::Free;
130 free_buffers.push_back(slot); 151 {
152 std::unique_lock lock{free_buffers_mutex};
153 free_buffers.push_back(slot);
154 }
155 free_buffers_condition.notify_one();
131 156
132 buffer_wait_event.writable->Signal(); 157 buffer_wait_event.writable->Signal();
133} 158}
134 159
160void BufferQueue::Connect() {
161 std::unique_lock lock{queue_sequence_mutex};
162 queue_sequence.clear();
163 is_connect = true;
164}
165
135void BufferQueue::Disconnect() { 166void BufferQueue::Disconnect() {
136 buffers.fill({}); 167 buffers.fill({});
137 queue_sequence.clear(); 168 {
169 std::unique_lock lock{queue_sequence_mutex};
170 queue_sequence.clear();
171 }
138 buffer_wait_event.writable->Signal(); 172 buffer_wait_event.writable->Signal();
173 is_connect = false;
174 free_buffers_condition.notify_one();
139} 175}
140 176
141u32 BufferQueue::Query(QueryType type) { 177u32 BufferQueue::Query(QueryType type) {
@@ -144,9 +180,11 @@ u32 BufferQueue::Query(QueryType type) {
144 switch (type) { 180 switch (type) {
145 case QueryType::NativeWindowFormat: 181 case QueryType::NativeWindowFormat:
146 return static_cast<u32>(PixelFormat::RGBA8888); 182 return static_cast<u32>(PixelFormat::RGBA8888);
183 case QueryType::NativeWindowWidth:
184 case QueryType::NativeWindowHeight:
185 break;
147 } 186 }
148 187 UNIMPLEMENTED_MSG("Unimplemented query type={}", type);
149 UNIMPLEMENTED();
150 return 0; 188 return 0;
151} 189}
152 190
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index e610923cb..ad7469277 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -4,7 +4,9 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <condition_variable>
7#include <list> 8#include <list>
9#include <mutex>
8#include <optional> 10#include <optional>
9#include <vector> 11#include <vector>
10 12
@@ -99,6 +101,7 @@ public:
99 void CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& multi_fence); 101 void CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& multi_fence);
100 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); 102 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
101 void ReleaseBuffer(u32 slot); 103 void ReleaseBuffer(u32 slot);
104 void Connect();
102 void Disconnect(); 105 void Disconnect();
103 u32 Query(QueryType type); 106 u32 Query(QueryType type);
104 107
@@ -106,18 +109,30 @@ public:
106 return id; 109 return id;
107 } 110 }
108 111
112 bool IsConnected() const {
113 return is_connect;
114 }
115
109 std::shared_ptr<Kernel::WritableEvent> GetWritableBufferWaitEvent() const; 116 std::shared_ptr<Kernel::WritableEvent> GetWritableBufferWaitEvent() const;
110 117
111 std::shared_ptr<Kernel::ReadableEvent> GetBufferWaitEvent() const; 118 std::shared_ptr<Kernel::ReadableEvent> GetBufferWaitEvent() const;
112 119
113private: 120private:
114 u32 id; 121 BufferQueue(const BufferQueue&) = delete;
115 u64 layer_id; 122
123 u32 id{};
124 u64 layer_id{};
125 std::atomic_bool is_connect{};
116 126
117 std::list<u32> free_buffers; 127 std::list<u32> free_buffers;
118 std::array<Buffer, buffer_slots> buffers; 128 std::array<Buffer, buffer_slots> buffers;
119 std::list<u32> queue_sequence; 129 std::list<u32> queue_sequence;
120 Kernel::EventPair buffer_wait_event; 130 Kernel::EventPair buffer_wait_event;
131
132 std::mutex free_buffers_mutex;
133 std::condition_variable free_buffers_condition;
134
135 std::mutex queue_sequence_mutex;
121}; 136};
122 137
123} // namespace Service::NVFlinger 138} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 44aa2bdae..4b3581949 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -88,6 +88,10 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) {
88} 88}
89 89
90NVFlinger::~NVFlinger() { 90NVFlinger::~NVFlinger() {
91 for (auto& buffer_queue : buffer_queues) {
92 buffer_queue->Disconnect();
93 }
94
91 if (system.IsMulticore()) { 95 if (system.IsMulticore()) {
92 is_running = false; 96 is_running = false;
93 wait_event->Set(); 97 wait_event->Set();
@@ -104,6 +108,8 @@ void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
104} 108}
105 109
106std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) { 110std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
111 const auto guard = Lock();
112
107 LOG_DEBUG(Service, "Opening \"{}\" display", name); 113 LOG_DEBUG(Service, "Opening \"{}\" display", name);
108 114
109 // TODO(Subv): Currently we only support the Default display. 115 // TODO(Subv): Currently we only support the Default display.
@@ -121,6 +127,7 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
121} 127}
122 128
123std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { 129std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
130 const auto guard = Lock();
124 auto* const display = FindDisplay(display_id); 131 auto* const display = FindDisplay(display_id);
125 132
126 if (display == nullptr) { 133 if (display == nullptr) {
@@ -129,18 +136,22 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
129 136
130 const u64 layer_id = next_layer_id++; 137 const u64 layer_id = next_layer_id++;
131 const u32 buffer_queue_id = next_buffer_queue_id++; 138 const u32 buffer_queue_id = next_buffer_queue_id++;
132 buffer_queues.emplace_back(system.Kernel(), buffer_queue_id, layer_id); 139 buffer_queues.emplace_back(
133 display->CreateLayer(layer_id, buffer_queues.back()); 140 std::make_unique<BufferQueue>(system.Kernel(), buffer_queue_id, layer_id));
141 display->CreateLayer(layer_id, *buffer_queues.back());
134 return layer_id; 142 return layer_id;
135} 143}
136 144
137void NVFlinger::CloseLayer(u64 layer_id) { 145void NVFlinger::CloseLayer(u64 layer_id) {
146 const auto guard = Lock();
147
138 for (auto& display : displays) { 148 for (auto& display : displays) {
139 display.CloseLayer(layer_id); 149 display.CloseLayer(layer_id);
140 } 150 }
141} 151}
142 152
143std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const { 153std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const {
154 const auto guard = Lock();
144 const auto* const layer = FindLayer(display_id, layer_id); 155 const auto* const layer = FindLayer(display_id, layer_id);
145 156
146 if (layer == nullptr) { 157 if (layer == nullptr) {
@@ -151,6 +162,7 @@ std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) co
151} 162}
152 163
153std::shared_ptr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const { 164std::shared_ptr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
165 const auto guard = Lock();
154 auto* const display = FindDisplay(display_id); 166 auto* const display = FindDisplay(display_id);
155 167
156 if (display == nullptr) { 168 if (display == nullptr) {
@@ -160,20 +172,16 @@ std::shared_ptr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id)
160 return display->GetVSyncEvent(); 172 return display->GetVSyncEvent();
161} 173}
162 174
163BufferQueue& NVFlinger::FindBufferQueue(u32 id) { 175BufferQueue* NVFlinger::FindBufferQueue(u32 id) {
176 const auto guard = Lock();
164 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), 177 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
165 [id](const auto& queue) { return queue.GetId() == id; }); 178 [id](const auto& queue) { return queue->GetId() == id; });
166 179
167 ASSERT(itr != buffer_queues.end()); 180 if (itr == buffer_queues.end()) {
168 return *itr; 181 return nullptr;
169} 182 }
170
171const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
172 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
173 [id](const auto& queue) { return queue.GetId() == id; });
174 183
175 ASSERT(itr != buffer_queues.end()); 184 return itr->get();
176 return *itr;
177} 185}
178 186
179VI::Display* NVFlinger::FindDisplay(u64 display_id) { 187VI::Display* NVFlinger::FindDisplay(u64 display_id) {
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index 1ebe949c0..c6765259f 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -75,10 +75,7 @@ public:
75 [[nodiscard]] std::shared_ptr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const; 75 [[nodiscard]] std::shared_ptr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;
76 76
77 /// Obtains a buffer queue identified by the ID. 77 /// Obtains a buffer queue identified by the ID.
78 [[nodiscard]] BufferQueue& FindBufferQueue(u32 id); 78 [[nodiscard]] BufferQueue* FindBufferQueue(u32 id);
79
80 /// Obtains a buffer queue identified by the ID.
81 [[nodiscard]] const BufferQueue& FindBufferQueue(u32 id) const;
82 79
83 /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when 80 /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
84 /// finished. 81 /// finished.
@@ -86,11 +83,11 @@ public:
86 83
87 [[nodiscard]] s64 GetNextTicks() const; 84 [[nodiscard]] s64 GetNextTicks() const;
88 85
86private:
89 [[nodiscard]] std::unique_lock<std::mutex> Lock() const { 87 [[nodiscard]] std::unique_lock<std::mutex> Lock() const {
90 return std::unique_lock{*guard}; 88 return std::unique_lock{*guard};
91 } 89 }
92 90
93private:
94 /// Finds the display identified by the specified ID. 91 /// Finds the display identified by the specified ID.
95 [[nodiscard]] VI::Display* FindDisplay(u64 display_id); 92 [[nodiscard]] VI::Display* FindDisplay(u64 display_id);
96 93
@@ -110,7 +107,7 @@ private:
110 std::shared_ptr<Nvidia::Module> nvdrv; 107 std::shared_ptr<Nvidia::Module> nvdrv;
111 108
112 std::vector<VI::Display> displays; 109 std::vector<VI::Display> displays;
113 std::vector<BufferQueue> buffer_queues; 110 std::vector<std::unique_ptr<BufferQueue>> buffer_queues;
114 111
115 /// Id to use for the next layer that is created, this counter is shared among all displays. 112 /// Id to use for the next layer that is created, this counter is shared among all displays.
116 u64 next_layer_id = 1; 113 u64 next_layer_id = 1;
diff --git a/src/core/hle/service/pcie/pcie.cpp b/src/core/hle/service/pcie/pcie.cpp
index 80c0fc7ac..f6686fc4d 100644
--- a/src/core/hle/service/pcie/pcie.cpp
+++ b/src/core/hle/service/pcie/pcie.cpp
@@ -48,7 +48,7 @@ public:
48 48
49class PCIe final : public ServiceFramework<PCIe> { 49class PCIe final : public ServiceFramework<PCIe> {
50public: 50public:
51 explicit PCIe(Core::System& system_) : ServiceFramework{system, "pcie"} { 51 explicit PCIe(Core::System& system_) : ServiceFramework{system_, "pcie"} {
52 // clang-format off 52 // clang-format off
53 static const FunctionInfo functions[] = { 53 static const FunctionInfo functions[] = {
54 {0, nullptr, "RegisterClassDriver"}, 54 {0, nullptr, "RegisterClassDriver"},
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index abf3d1ea3..ff2a5b1db 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -95,9 +95,14 @@ ServiceFrameworkBase::ServiceFrameworkBase(Core::System& system_, const char* se
95 : system{system_}, service_name{service_name_}, max_sessions{max_sessions_}, 95 : system{system_}, service_name{service_name_}, max_sessions{max_sessions_},
96 handler_invoker{handler_invoker_} {} 96 handler_invoker{handler_invoker_} {}
97 97
98ServiceFrameworkBase::~ServiceFrameworkBase() = default; 98ServiceFrameworkBase::~ServiceFrameworkBase() {
99 // Wait for other threads to release access before destroying
100 const auto guard = LockService();
101}
99 102
100void ServiceFrameworkBase::InstallAsService(SM::ServiceManager& service_manager) { 103void ServiceFrameworkBase::InstallAsService(SM::ServiceManager& service_manager) {
104 const auto guard = LockService();
105
101 ASSERT(!port_installed); 106 ASSERT(!port_installed);
102 107
103 auto port = service_manager.RegisterService(service_name, max_sessions).Unwrap(); 108 auto port = service_manager.RegisterService(service_name, max_sessions).Unwrap();
@@ -106,6 +111,8 @@ void ServiceFrameworkBase::InstallAsService(SM::ServiceManager& service_manager)
106} 111}
107 112
108void ServiceFrameworkBase::InstallAsNamedPort(Kernel::KernelCore& kernel) { 113void ServiceFrameworkBase::InstallAsNamedPort(Kernel::KernelCore& kernel) {
114 const auto guard = LockService();
115
109 ASSERT(!port_installed); 116 ASSERT(!port_installed);
110 117
111 auto [server_port, client_port] = 118 auto [server_port, client_port] =
@@ -115,17 +122,6 @@ void ServiceFrameworkBase::InstallAsNamedPort(Kernel::KernelCore& kernel) {
115 port_installed = true; 122 port_installed = true;
116} 123}
117 124
118std::shared_ptr<Kernel::ClientPort> ServiceFrameworkBase::CreatePort(Kernel::KernelCore& kernel) {
119 ASSERT(!port_installed);
120
121 auto [server_port, client_port] =
122 Kernel::ServerPort::CreatePortPair(kernel, max_sessions, service_name);
123 auto port = MakeResult(std::move(server_port)).Unwrap();
124 port->SetHleHandler(shared_from_this());
125 port_installed = true;
126 return client_port;
127}
128
129void ServiceFrameworkBase::RegisterHandlersBase(const FunctionInfoBase* functions, std::size_t n) { 125void ServiceFrameworkBase::RegisterHandlersBase(const FunctionInfoBase* functions, std::size_t n) {
130 handlers.reserve(handlers.size() + n); 126 handlers.reserve(handlers.size() + n);
131 for (std::size_t i = 0; i < n; ++i) { 127 for (std::size_t i = 0; i < n; ++i) {
@@ -164,6 +160,8 @@ void ServiceFrameworkBase::InvokeRequest(Kernel::HLERequestContext& ctx) {
164} 160}
165 161
166ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& context) { 162ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& context) {
163 const auto guard = LockService();
164
167 switch (context.GetCommandType()) { 165 switch (context.GetCommandType()) {
168 case IPC::CommandType::Close: { 166 case IPC::CommandType::Close: {
169 IPC::ResponseBuilder rb{context, 2}; 167 IPC::ResponseBuilder rb{context, 2};
@@ -184,7 +182,11 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
184 UNIMPLEMENTED_MSG("command_type={}", context.GetCommandType()); 182 UNIMPLEMENTED_MSG("command_type={}", context.GetCommandType());
185 } 183 }
186 184
187 context.WriteToOutgoingCommandBuffer(context.GetThread()); 185 // If emulation was shutdown, we are closing service threads, do not write the response back to
186 // memory that may be shutting down as well.
187 if (system.IsPoweredOn()) {
188 context.WriteToOutgoingCommandBuffer(context.GetThread());
189 }
188 190
189 return RESULT_SUCCESS; 191 return RESULT_SUCCESS;
190} 192}
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h
index 62a182310..916445517 100644
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -5,9 +5,11 @@
5#pragma once 5#pragma once
6 6
7#include <cstddef> 7#include <cstddef>
8#include <mutex>
8#include <string> 9#include <string>
9#include <boost/container/flat_map.hpp> 10#include <boost/container/flat_map.hpp>
10#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/spin_lock.h"
11#include "core/hle/kernel/hle_ipc.h" 13#include "core/hle/kernel/hle_ipc.h"
12#include "core/hle/kernel/object.h" 14#include "core/hle/kernel/object.h"
13 15
@@ -68,11 +70,9 @@ public:
68 void InstallAsService(SM::ServiceManager& service_manager); 70 void InstallAsService(SM::ServiceManager& service_manager);
69 /// Creates a port pair and registers it on the kernel's global port registry. 71 /// Creates a port pair and registers it on the kernel's global port registry.
70 void InstallAsNamedPort(Kernel::KernelCore& kernel); 72 void InstallAsNamedPort(Kernel::KernelCore& kernel);
71 /// Creates and returns an unregistered port for the service. 73 /// Invokes a service request routine.
72 std::shared_ptr<Kernel::ClientPort> CreatePort(Kernel::KernelCore& kernel);
73
74 void InvokeRequest(Kernel::HLERequestContext& ctx); 74 void InvokeRequest(Kernel::HLERequestContext& ctx);
75 75 /// Handles a synchronization request for the service.
76 ResultCode HandleSyncRequest(Kernel::HLERequestContext& context) override; 76 ResultCode HandleSyncRequest(Kernel::HLERequestContext& context) override;
77 77
78protected: 78protected:
@@ -80,6 +80,11 @@ protected:
80 template <typename Self> 80 template <typename Self>
81 using HandlerFnP = void (Self::*)(Kernel::HLERequestContext&); 81 using HandlerFnP = void (Self::*)(Kernel::HLERequestContext&);
82 82
83 /// Used to gain exclusive access to the service members, e.g. from CoreTiming thread.
84 [[nodiscard]] std::scoped_lock<Common::SpinLock> LockService() {
85 return std::scoped_lock{lock_service};
86 }
87
83 /// System context that the service operates under. 88 /// System context that the service operates under.
84 Core::System& system; 89 Core::System& system;
85 90
@@ -115,6 +120,9 @@ private:
115 /// Function used to safely up-cast pointers to the derived class before invoking a handler. 120 /// Function used to safely up-cast pointers to the derived class before invoking a handler.
116 InvokerFn* handler_invoker; 121 InvokerFn* handler_invoker;
117 boost::container::flat_map<u32, FunctionInfoBase> handlers; 122 boost::container::flat_map<u32, FunctionInfoBase> handlers;
123
124 /// Used to gain exclusive access to the service members, e.g. from CoreTiming thread.
125 Common::SpinLock lock_service;
118}; 126};
119 127
120/** 128/**
diff --git a/src/core/hle/service/sockets/blocking_worker.h b/src/core/hle/service/sockets/blocking_worker.h
deleted file mode 100644
index 2d53e52b6..000000000
--- a/src/core/hle/service/sockets/blocking_worker.h
+++ /dev/null
@@ -1,161 +0,0 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <memory>
9#include <string>
10#include <string_view>
11#include <thread>
12#include <variant>
13#include <vector>
14
15#include <fmt/format.h>
16
17#include "common/assert.h"
18#include "common/microprofile.h"
19#include "common/thread.h"
20#include "core/core.h"
21#include "core/hle/kernel/hle_ipc.h"
22#include "core/hle/kernel/kernel.h"
23#include "core/hle/kernel/thread.h"
24#include "core/hle/kernel/writable_event.h"
25
26namespace Service::Sockets {
27
28/**
29 * Worker abstraction to execute blocking calls on host without blocking the guest thread
30 *
31 * @tparam Service Service where the work is executed
32 * @tparam Types Types of work to execute
33 */
34template <class Service, class... Types>
35class BlockingWorker {
36 using This = BlockingWorker<Service, Types...>;
37 using WorkVariant = std::variant<std::monostate, Types...>;
38
39public:
40 /// Create a new worker
41 static std::unique_ptr<This> Create(Core::System& system, Service* service,
42 std::string_view name) {
43 return std::unique_ptr<This>(new This(system, service, name));
44 }
45
46 ~BlockingWorker() {
47 while (!is_available.load(std::memory_order_relaxed)) {
48 // Busy wait until work is finished
49 std::this_thread::yield();
50 }
51 // Monostate means to exit the thread
52 work = std::monostate{};
53 work_event.Set();
54 thread.join();
55 }
56
57 /**
58 * Try to capture the worker to send work after a success
59 * @returns True when the worker has been successfully captured
60 */
61 bool TryCapture() {
62 bool expected = true;
63 return is_available.compare_exchange_weak(expected, false, std::memory_order_relaxed,
64 std::memory_order_relaxed);
65 }
66
67 /**
68 * Send work to this worker abstraction
69 * @see TryCapture must be called before attempting to call this function
70 */
71 template <class Work>
72 void SendWork(Work new_work) {
73 ASSERT_MSG(!is_available, "Trying to send work on a worker that's not captured");
74 work = std::move(new_work);
75 work_event.Set();
76 }
77
78 /// Generate a callback for @see SleepClientThread
79 template <class Work>
80 auto Callback() {
81 return [this](std::shared_ptr<Kernel::Thread>, Kernel::HLERequestContext& ctx,
82 Kernel::ThreadWakeupReason reason) {
83 ASSERT(reason == Kernel::ThreadWakeupReason::Signal);
84 std::get<Work>(work).Response(ctx);
85 is_available.store(true);
86 };
87 }
88
89 /// Get kernel event that will be signalled by the worker when the host operation finishes
90 std::shared_ptr<Kernel::WritableEvent> KernelEvent() const {
91 return kernel_event;
92 }
93
94private:
95 explicit BlockingWorker(Core::System& system, Service* service, std::string_view name) {
96 auto pair = Kernel::WritableEvent::CreateEventPair(system.Kernel(), std::string(name));
97 kernel_event = std::move(pair.writable);
98 thread = std::thread([this, &system, service, name] { Run(system, service, name); });
99 }
100
101 void Run(Core::System& system, Service* service, std::string_view name) {
102 system.RegisterHostThread();
103
104 const std::string thread_name = fmt::format("yuzu:{}", name);
105 MicroProfileOnThreadCreate(thread_name.c_str());
106 Common::SetCurrentThreadName(thread_name.c_str());
107
108 bool keep_running = true;
109 while (keep_running) {
110 work_event.Wait();
111
112 const auto visit_fn = [service, &keep_running]<typename T>(T&& w) {
113 if constexpr (std::is_same_v<std::decay_t<T>, std::monostate>) {
114 keep_running = false;
115 } else {
116 w.Execute(service);
117 }
118 };
119 std::visit(visit_fn, work);
120
121 kernel_event->Signal();
122 }
123 }
124
125 std::thread thread;
126 WorkVariant work;
127 Common::Event work_event;
128 std::shared_ptr<Kernel::WritableEvent> kernel_event;
129 std::atomic_bool is_available{true};
130};
131
132template <class Service, class... Types>
133class BlockingWorkerPool {
134 using Worker = BlockingWorker<Service, Types...>;
135
136public:
137 explicit BlockingWorkerPool(Core::System& system_, Service* service_)
138 : system{system_}, service{service_} {}
139
140 /// Returns a captured worker thread, creating new ones if necessary
141 Worker* CaptureWorker() {
142 for (auto& worker : workers) {
143 if (worker->TryCapture()) {
144 return worker.get();
145 }
146 }
147 auto new_worker = Worker::Create(system, service, fmt::format("BSD:{}", workers.size()));
148 [[maybe_unused]] const bool success = new_worker->TryCapture();
149 ASSERT(success);
150
151 return workers.emplace_back(std::move(new_worker)).get();
152 }
153
154private:
155 Core::System& system;
156 Service* const service;
157
158 std::vector<std::unique_ptr<Worker>> workers;
159};
160
161} // namespace Service::Sockets
diff --git a/src/core/hle/service/sockets/bsd.cpp b/src/core/hle/service/sockets/bsd.cpp
index 67b419503..2b824059d 100644
--- a/src/core/hle/service/sockets/bsd.cpp
+++ b/src/core/hle/service/sockets/bsd.cpp
@@ -178,13 +178,12 @@ void BSD::Poll(Kernel::HLERequestContext& ctx) {
178 178
179 LOG_DEBUG(Service, "called. nfds={} timeout={}", nfds, timeout); 179 LOG_DEBUG(Service, "called. nfds={} timeout={}", nfds, timeout);
180 180
181 ExecuteWork(ctx, "BSD:Poll", timeout != 0, 181 ExecuteWork(ctx, PollWork{
182 PollWork{ 182 .nfds = nfds,
183 .nfds = nfds, 183 .timeout = timeout,
184 .timeout = timeout, 184 .read_buffer = ctx.ReadBuffer(),
185 .read_buffer = ctx.ReadBuffer(), 185 .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()),
186 .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()), 186 });
187 });
188} 187}
189 188
190void BSD::Accept(Kernel::HLERequestContext& ctx) { 189void BSD::Accept(Kernel::HLERequestContext& ctx) {
@@ -193,11 +192,10 @@ void BSD::Accept(Kernel::HLERequestContext& ctx) {
193 192
194 LOG_DEBUG(Service, "called. fd={}", fd); 193 LOG_DEBUG(Service, "called. fd={}", fd);
195 194
196 ExecuteWork(ctx, "BSD:Accept", IsBlockingSocket(fd), 195 ExecuteWork(ctx, AcceptWork{
197 AcceptWork{ 196 .fd = fd,
198 .fd = fd, 197 .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()),
199 .write_buffer = std::vector<u8>(ctx.GetWriteBufferSize()), 198 });
200 });
201} 199}
202 200
203void BSD::Bind(Kernel::HLERequestContext& ctx) { 201void BSD::Bind(Kernel::HLERequestContext& ctx) {
@@ -215,11 +213,10 @@ void BSD::Connect(Kernel::HLERequestContext& ctx) {
215 213
216 LOG_DEBUG(Service, "called. fd={} addrlen={}", fd, ctx.GetReadBufferSize()); 214 LOG_DEBUG(Service, "called. fd={} addrlen={}", fd, ctx.GetReadBufferSize());
217 215
218 ExecuteWork(ctx, "BSD:Connect", IsBlockingSocket(fd), 216 ExecuteWork(ctx, ConnectWork{
219 ConnectWork{ 217 .fd = fd,
220 .fd = fd, 218 .addr = ctx.ReadBuffer(),
221 .addr = ctx.ReadBuffer(), 219 });
222 });
223} 220}
224 221
225void BSD::GetPeerName(Kernel::HLERequestContext& ctx) { 222void BSD::GetPeerName(Kernel::HLERequestContext& ctx) {
@@ -327,12 +324,11 @@ void BSD::Recv(Kernel::HLERequestContext& ctx) {
327 324
328 LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={}", fd, flags, ctx.GetWriteBufferSize()); 325 LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={}", fd, flags, ctx.GetWriteBufferSize());
329 326
330 ExecuteWork(ctx, "BSD:Recv", IsBlockingSocket(fd), 327 ExecuteWork(ctx, RecvWork{
331 RecvWork{ 328 .fd = fd,
332 .fd = fd, 329 .flags = flags,
333 .flags = flags, 330 .message = std::vector<u8>(ctx.GetWriteBufferSize()),
334 .message = std::vector<u8>(ctx.GetWriteBufferSize()), 331 });
335 });
336} 332}
337 333
338void BSD::RecvFrom(Kernel::HLERequestContext& ctx) { 334void BSD::RecvFrom(Kernel::HLERequestContext& ctx) {
@@ -344,13 +340,12 @@ void BSD::RecvFrom(Kernel::HLERequestContext& ctx) {
344 LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={} addrlen={}", fd, flags, 340 LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={} addrlen={}", fd, flags,
345 ctx.GetWriteBufferSize(0), ctx.GetWriteBufferSize(1)); 341 ctx.GetWriteBufferSize(0), ctx.GetWriteBufferSize(1));
346 342
347 ExecuteWork(ctx, "BSD:RecvFrom", IsBlockingSocket(fd), 343 ExecuteWork(ctx, RecvFromWork{
348 RecvFromWork{ 344 .fd = fd,
349 .fd = fd, 345 .flags = flags,
350 .flags = flags, 346 .message = std::vector<u8>(ctx.GetWriteBufferSize(0)),
351 .message = std::vector<u8>(ctx.GetWriteBufferSize(0)), 347 .addr = std::vector<u8>(ctx.GetWriteBufferSize(1)),
352 .addr = std::vector<u8>(ctx.GetWriteBufferSize(1)), 348 });
353 });
354} 349}
355 350
356void BSD::Send(Kernel::HLERequestContext& ctx) { 351void BSD::Send(Kernel::HLERequestContext& ctx) {
@@ -361,12 +356,11 @@ void BSD::Send(Kernel::HLERequestContext& ctx) {
361 356
362 LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={}", fd, flags, ctx.GetReadBufferSize()); 357 LOG_DEBUG(Service, "called. fd={} flags=0x{:x} len={}", fd, flags, ctx.GetReadBufferSize());
363 358
364 ExecuteWork(ctx, "BSD:Send", IsBlockingSocket(fd), 359 ExecuteWork(ctx, SendWork{
365 SendWork{ 360 .fd = fd,
366 .fd = fd, 361 .flags = flags,
367 .flags = flags, 362 .message = ctx.ReadBuffer(),
368 .message = ctx.ReadBuffer(), 363 });
369 });
370} 364}
371 365
372void BSD::SendTo(Kernel::HLERequestContext& ctx) { 366void BSD::SendTo(Kernel::HLERequestContext& ctx) {
@@ -377,13 +371,12 @@ void BSD::SendTo(Kernel::HLERequestContext& ctx) {
377 LOG_DEBUG(Service, "called. fd={} flags=0x{} len={} addrlen={}", fd, flags, 371 LOG_DEBUG(Service, "called. fd={} flags=0x{} len={} addrlen={}", fd, flags,
378 ctx.GetReadBufferSize(0), ctx.GetReadBufferSize(1)); 372 ctx.GetReadBufferSize(0), ctx.GetReadBufferSize(1));
379 373
380 ExecuteWork(ctx, "BSD:SendTo", IsBlockingSocket(fd), 374 ExecuteWork(ctx, SendToWork{
381 SendToWork{ 375 .fd = fd,
382 .fd = fd, 376 .flags = flags,
383 .flags = flags, 377 .message = ctx.ReadBuffer(0),
384 .message = ctx.ReadBuffer(0), 378 .addr = ctx.ReadBuffer(1),
385 .addr = ctx.ReadBuffer(1), 379 });
386 });
387} 380}
388 381
389void BSD::Write(Kernel::HLERequestContext& ctx) { 382void BSD::Write(Kernel::HLERequestContext& ctx) {
@@ -392,12 +385,11 @@ void BSD::Write(Kernel::HLERequestContext& ctx) {
392 385
393 LOG_DEBUG(Service, "called. fd={} len={}", fd, ctx.GetReadBufferSize()); 386 LOG_DEBUG(Service, "called. fd={} len={}", fd, ctx.GetReadBufferSize());
394 387
395 ExecuteWork(ctx, "BSD:Write", IsBlockingSocket(fd), 388 ExecuteWork(ctx, SendWork{
396 SendWork{ 389 .fd = fd,
397 .fd = fd, 390 .flags = 0,
398 .flags = 0, 391 .message = ctx.ReadBuffer(),
399 .message = ctx.ReadBuffer(), 392 });
400 });
401} 393}
402 394
403void BSD::Close(Kernel::HLERequestContext& ctx) { 395void BSD::Close(Kernel::HLERequestContext& ctx) {
@@ -410,24 +402,9 @@ void BSD::Close(Kernel::HLERequestContext& ctx) {
410} 402}
411 403
412template <typename Work> 404template <typename Work>
413void BSD::ExecuteWork(Kernel::HLERequestContext& ctx, std::string_view sleep_reason, 405void BSD::ExecuteWork(Kernel::HLERequestContext& ctx, Work work) {
414 bool is_blocking, Work work) { 406 work.Execute(this);
415 if (!is_blocking) {
416 work.Execute(this);
417 work.Response(ctx);
418 return;
419 }
420
421 // Signal a dummy response to make IPC validation happy
422 // This will be overwritten by the SleepClientThread callback
423 work.Response(ctx); 407 work.Response(ctx);
424
425 auto worker = worker_pool.CaptureWorker();
426
427 ctx.SleepClientThread(std::string(sleep_reason), std::numeric_limits<u64>::max(),
428 worker->Callback<Work>(), worker->KernelEvent());
429
430 worker->SendWork(std::move(work));
431} 408}
432 409
433std::pair<s32, Errno> BSD::SocketImpl(Domain domain, Type type, Protocol protocol) { 410std::pair<s32, Errno> BSD::SocketImpl(Domain domain, Type type, Protocol protocol) {
@@ -807,18 +784,6 @@ bool BSD::IsFileDescriptorValid(s32 fd) const noexcept {
807 return true; 784 return true;
808} 785}
809 786
810bool BSD::IsBlockingSocket(s32 fd) const noexcept {
811 // Inform invalid sockets as non-blocking
812 // This way we avoid using a worker thread as it will fail without blocking host
813 if (fd > static_cast<s32>(MAX_FD) || fd < 0) {
814 return false;
815 }
816 if (!file_descriptors[fd]) {
817 return false;
818 }
819 return (file_descriptors[fd]->flags & FLAG_O_NONBLOCK) != 0;
820}
821
822void BSD::BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) const noexcept { 787void BSD::BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) const noexcept {
823 IPC::ResponseBuilder rb{ctx, 4}; 788 IPC::ResponseBuilder rb{ctx, 4};
824 789
@@ -827,8 +792,7 @@ void BSD::BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) co
827 rb.PushEnum(bsd_errno); 792 rb.PushEnum(bsd_errno);
828} 793}
829 794
830BSD::BSD(Core::System& system_, const char* name) 795BSD::BSD(Core::System& system_, const char* name) : ServiceFramework{system_, name} {
831 : ServiceFramework{system_, name}, worker_pool{system_, this} {
832 // clang-format off 796 // clang-format off
833 static const FunctionInfo functions[] = { 797 static const FunctionInfo functions[] = {
834 {0, &BSD::RegisterClient, "RegisterClient"}, 798 {0, &BSD::RegisterClient, "RegisterClient"},
diff --git a/src/core/hle/service/sockets/bsd.h b/src/core/hle/service/sockets/bsd.h
index f14713fc4..6da0bfeb2 100644
--- a/src/core/hle/service/sockets/bsd.h
+++ b/src/core/hle/service/sockets/bsd.h
@@ -11,7 +11,6 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "core/hle/kernel/hle_ipc.h" 12#include "core/hle/kernel/hle_ipc.h"
13#include "core/hle/service/service.h" 13#include "core/hle/service/service.h"
14#include "core/hle/service/sockets/blocking_worker.h"
15#include "core/hle/service/sockets/sockets.h" 14#include "core/hle/service/sockets/sockets.h"
16 15
17namespace Core { 16namespace Core {
@@ -138,8 +137,7 @@ private:
138 void Close(Kernel::HLERequestContext& ctx); 137 void Close(Kernel::HLERequestContext& ctx);
139 138
140 template <typename Work> 139 template <typename Work>
141 void ExecuteWork(Kernel::HLERequestContext& ctx, std::string_view sleep_reason, 140 void ExecuteWork(Kernel::HLERequestContext& ctx, Work work);
142 bool is_blocking, Work work);
143 141
144 std::pair<s32, Errno> SocketImpl(Domain domain, Type type, Protocol protocol); 142 std::pair<s32, Errno> SocketImpl(Domain domain, Type type, Protocol protocol);
145 std::pair<s32, Errno> PollImpl(std::vector<u8>& write_buffer, std::vector<u8> read_buffer, 143 std::pair<s32, Errno> PollImpl(std::vector<u8>& write_buffer, std::vector<u8> read_buffer,
@@ -163,15 +161,10 @@ private:
163 161
164 s32 FindFreeFileDescriptorHandle() noexcept; 162 s32 FindFreeFileDescriptorHandle() noexcept;
165 bool IsFileDescriptorValid(s32 fd) const noexcept; 163 bool IsFileDescriptorValid(s32 fd) const noexcept;
166 bool IsBlockingSocket(s32 fd) const noexcept;
167 164
168 void BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) const noexcept; 165 void BuildErrnoResponse(Kernel::HLERequestContext& ctx, Errno bsd_errno) const noexcept;
169 166
170 std::array<std::optional<FileDescriptor>, MAX_FD> file_descriptors; 167 std::array<std::optional<FileDescriptor>, MAX_FD> file_descriptors;
171
172 BlockingWorkerPool<BSD, PollWork, AcceptWork, ConnectWork, RecvWork, RecvFromWork, SendWork,
173 SendToWork>
174 worker_pool;
175}; 168};
176 169
177class BSDCFG final : public ServiceFramework<BSDCFG> { 170class BSDCFG final : public ServiceFramework<BSDCFG> {
diff --git a/src/core/hle/service/sockets/sockets_translate.cpp b/src/core/hle/service/sockets/sockets_translate.cpp
index c822d21b8..ca61d72ca 100644
--- a/src/core/hle/service/sockets/sockets_translate.cpp
+++ b/src/core/hle/service/sockets/sockets_translate.cpp
@@ -64,6 +64,7 @@ Network::Type Translate(Type type) {
64 return Network::Type::DGRAM; 64 return Network::Type::DGRAM;
65 default: 65 default:
66 UNIMPLEMENTED_MSG("Unimplemented type={}", type); 66 UNIMPLEMENTED_MSG("Unimplemented type={}", type);
67 return Network::Type{};
67 } 68 }
68} 69}
69 70
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index 45cfffe06..968cd16b6 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -536,8 +536,7 @@ private:
536 LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id, 536 LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
537 transaction, flags); 537 transaction, flags);
538 538
539 const auto guard = nv_flinger.Lock(); 539 auto& buffer_queue = *nv_flinger.FindBufferQueue(id);
540 auto& buffer_queue = nv_flinger.FindBufferQueue(id);
541 540
542 switch (transaction) { 541 switch (transaction) {
543 case TransactionId::Connect: { 542 case TransactionId::Connect: {
@@ -547,6 +546,9 @@ private:
547 Settings::values.resolution_factor.GetValue()), 546 Settings::values.resolution_factor.GetValue()),
548 static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) * 547 static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) *
549 Settings::values.resolution_factor.GetValue())}; 548 Settings::values.resolution_factor.GetValue())};
549
550 buffer_queue.Connect();
551
550 ctx.WriteBuffer(response.Serialize()); 552 ctx.WriteBuffer(response.Serialize());
551 break; 553 break;
552 } 554 }
@@ -563,40 +565,25 @@ private:
563 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; 565 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
564 const u32 width{request.data.width}; 566 const u32 width{request.data.width};
565 const u32 height{request.data.height}; 567 const u32 height{request.data.height};
566 auto result = buffer_queue.DequeueBuffer(width, height); 568
567 569 do {
568 if (result) { 570 if (auto result = buffer_queue.DequeueBuffer(width, height); result) {
569 // Buffer is available 571 // Buffer is available
570 IGBPDequeueBufferResponseParcel response{result->first, *result->second}; 572 IGBPDequeueBufferResponseParcel response{result->first, *result->second};
571 ctx.WriteBuffer(response.Serialize()); 573 ctx.WriteBuffer(response.Serialize());
572 } else { 574 break;
573 // Wait the current thread until a buffer becomes available 575 }
574 ctx.SleepClientThread( 576 } while (buffer_queue.IsConnected());
575 "IHOSBinderDriver::DequeueBuffer", UINT64_MAX, 577
576 [=, this](std::shared_ptr<Kernel::Thread> thread,
577 Kernel::HLERequestContext& ctx, Kernel::ThreadWakeupReason reason) {
578 // Repeat TransactParcel DequeueBuffer when a buffer is available
579 const auto guard = nv_flinger.Lock();
580 auto& buffer_queue = nv_flinger.FindBufferQueue(id);
581 auto result = buffer_queue.DequeueBuffer(width, height);
582 ASSERT_MSG(result != std::nullopt, "Could not dequeue buffer.");
583
584 IGBPDequeueBufferResponseParcel response{result->first, *result->second};
585 ctx.WriteBuffer(response.Serialize());
586 IPC::ResponseBuilder rb{ctx, 2};
587 rb.Push(RESULT_SUCCESS);
588 },
589 buffer_queue.GetWritableBufferWaitEvent());
590 }
591 break; 578 break;
592 } 579 }
593 case TransactionId::RequestBuffer: { 580 case TransactionId::RequestBuffer: {
594 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()}; 581 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};
595 582
596 auto& buffer = buffer_queue.RequestBuffer(request.slot); 583 auto& buffer = buffer_queue.RequestBuffer(request.slot);
597
598 IGBPRequestBufferResponseParcel response{buffer}; 584 IGBPRequestBufferResponseParcel response{buffer};
599 ctx.WriteBuffer(response.Serialize()); 585 ctx.WriteBuffer(response.Serialize());
586
600 break; 587 break;
601 } 588 }
602 case TransactionId::QueueBuffer: { 589 case TransactionId::QueueBuffer: {
@@ -682,7 +669,7 @@ private:
682 669
683 LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown); 670 LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown);
684 671
685 const auto& buffer_queue = nv_flinger.FindBufferQueue(id); 672 const auto& buffer_queue = *nv_flinger.FindBufferQueue(id);
686 673
687 // TODO(Subv): Find out what this actually is. 674 // TODO(Subv): Find out what this actually is.
688 IPC::ResponseBuilder rb{ctx, 2, 1}; 675 IPC::ResponseBuilder rb{ctx, 2, 1};
diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp
index d91c15561..e4f5fd40c 100644
--- a/src/core/loader/loader.cpp
+++ b/src/core/loader/loader.cpp
@@ -185,6 +185,10 @@ constexpr std::array<const char*, 66> RESULT_MESSAGES{
185 "The INI file contains more than the maximum allowable number of KIP files.", 185 "The INI file contains more than the maximum allowable number of KIP files.",
186}; 186};
187 187
188std::string GetResultStatusString(ResultStatus status) {
189 return RESULT_MESSAGES.at(static_cast<std::size_t>(status));
190}
191
188std::ostream& operator<<(std::ostream& os, ResultStatus status) { 192std::ostream& operator<<(std::ostream& os, ResultStatus status) {
189 os << RESULT_MESSAGES.at(static_cast<std::size_t>(status)); 193 os << RESULT_MESSAGES.at(static_cast<std::size_t>(status));
190 return os; 194 return os;
diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h
index 36e79e71d..b2e5b13de 100644
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -135,6 +135,7 @@ enum class ResultStatus : u16 {
135 ErrorINITooManyKIPs, 135 ErrorINITooManyKIPs,
136}; 136};
137 137
138std::string GetResultStatusString(ResultStatus status);
138std::ostream& operator<<(std::ostream& os, ResultStatus status); 139std::ostream& operator<<(std::ostream& os, ResultStatus status);
139 140
140/// Interface for loading an application 141/// Interface for loading an application
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 54a848936..11609682a 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -4,7 +4,6 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <cstring> 6#include <cstring>
7#include <mutex>
8#include <optional> 7#include <optional>
9#include <utility> 8#include <utility>
10 9
@@ -45,44 +44,16 @@ struct Memory::Impl {
45 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory); 44 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory);
46 } 45 }
47 46
48 void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
49 Common::MemoryHookPointer mmio_handler) {
50 UNIMPLEMENTED();
51 }
52
53 void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) { 47 void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
54 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); 48 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
55 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); 49 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
56 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, 0, Common::PageType::Unmapped); 50 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, 0, Common::PageType::Unmapped);
57 } 51 }
58 52
59 void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
60 Common::MemoryHookPointer hook) {
61 UNIMPLEMENTED();
62 }
63
64 void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
65 Common::MemoryHookPointer hook) {
66 UNIMPLEMENTED();
67 }
68
69 bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) const { 53 bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) const {
70 const auto& page_table = process.PageTable().PageTableImpl(); 54 const auto& page_table = process.PageTable().PageTableImpl();
71 55 const auto [pointer, type] = page_table.pointers[vaddr >> PAGE_BITS].PointerType();
72 const u8* const page_pointer = page_table.pointers[vaddr >> PAGE_BITS]; 56 return pointer != nullptr || type == Common::PageType::RasterizerCachedMemory;
73 if (page_pointer != nullptr) {
74 return true;
75 }
76
77 if (page_table.attributes[vaddr >> PAGE_BITS] == Common::PageType::RasterizerCachedMemory) {
78 return true;
79 }
80
81 if (page_table.attributes[vaddr >> PAGE_BITS] != Common::PageType::Special) {
82 return false;
83 }
84
85 return false;
86 } 57 }
87 58
88 bool IsValidVirtualAddress(VAddr vaddr) const { 59 bool IsValidVirtualAddress(VAddr vaddr) const {
@@ -100,17 +71,15 @@ struct Memory::Impl {
100 } 71 }
101 72
102 u8* GetPointer(const VAddr vaddr) const { 73 u8* GetPointer(const VAddr vaddr) const {
103 u8* const page_pointer{current_page_table->pointers[vaddr >> PAGE_BITS]}; 74 const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
104 if (page_pointer) { 75 if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
105 return page_pointer + vaddr; 76 return pointer + vaddr;
106 } 77 }
107 78 const auto type = Common::PageTable::PageInfo::ExtractType(raw_pointer);
108 if (current_page_table->attributes[vaddr >> PAGE_BITS] == 79 if (type == Common::PageType::RasterizerCachedMemory) {
109 Common::PageType::RasterizerCachedMemory) {
110 return GetPointerFromRasterizerCachedMemory(vaddr); 80 return GetPointerFromRasterizerCachedMemory(vaddr);
111 } 81 }
112 82 return nullptr;
113 return {};
114 } 83 }
115 84
116 u8 Read8(const VAddr addr) { 85 u8 Read8(const VAddr addr) {
@@ -222,7 +191,8 @@ struct Memory::Impl {
222 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); 191 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
223 const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 192 const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
224 193
225 switch (page_table.attributes[page_index]) { 194 const auto [pointer, type] = page_table.pointers[page_index].PointerType();
195 switch (type) {
226 case Common::PageType::Unmapped: { 196 case Common::PageType::Unmapped: {
227 LOG_ERROR(HW_Memory, 197 LOG_ERROR(HW_Memory,
228 "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 198 "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
@@ -231,10 +201,8 @@ struct Memory::Impl {
231 break; 201 break;
232 } 202 }
233 case Common::PageType::Memory: { 203 case Common::PageType::Memory: {
234 DEBUG_ASSERT(page_table.pointers[page_index]); 204 DEBUG_ASSERT(pointer);
235 205 const u8* const src_ptr = pointer + page_offset + (page_index << PAGE_BITS);
236 const u8* const src_ptr =
237 page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
238 std::memcpy(dest_buffer, src_ptr, copy_amount); 206 std::memcpy(dest_buffer, src_ptr, copy_amount);
239 break; 207 break;
240 } 208 }
@@ -268,7 +236,8 @@ struct Memory::Impl {
268 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); 236 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
269 const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 237 const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
270 238
271 switch (page_table.attributes[page_index]) { 239 const auto [pointer, type] = page_table.pointers[page_index].PointerType();
240 switch (type) {
272 case Common::PageType::Unmapped: { 241 case Common::PageType::Unmapped: {
273 LOG_ERROR(HW_Memory, 242 LOG_ERROR(HW_Memory,
274 "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 243 "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
@@ -277,10 +246,8 @@ struct Memory::Impl {
277 break; 246 break;
278 } 247 }
279 case Common::PageType::Memory: { 248 case Common::PageType::Memory: {
280 DEBUG_ASSERT(page_table.pointers[page_index]); 249 DEBUG_ASSERT(pointer);
281 250 const u8* const src_ptr = pointer + page_offset + (page_index << PAGE_BITS);
282 const u8* const src_ptr =
283 page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
284 std::memcpy(dest_buffer, src_ptr, copy_amount); 251 std::memcpy(dest_buffer, src_ptr, copy_amount);
285 break; 252 break;
286 } 253 }
@@ -320,7 +287,8 @@ struct Memory::Impl {
320 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); 287 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
321 const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 288 const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
322 289
323 switch (page_table.attributes[page_index]) { 290 const auto [pointer, type] = page_table.pointers[page_index].PointerType();
291 switch (type) {
324 case Common::PageType::Unmapped: { 292 case Common::PageType::Unmapped: {
325 LOG_ERROR(HW_Memory, 293 LOG_ERROR(HW_Memory,
326 "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 294 "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
@@ -328,10 +296,8 @@ struct Memory::Impl {
328 break; 296 break;
329 } 297 }
330 case Common::PageType::Memory: { 298 case Common::PageType::Memory: {
331 DEBUG_ASSERT(page_table.pointers[page_index]); 299 DEBUG_ASSERT(pointer);
332 300 u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS);
333 u8* const dest_ptr =
334 page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
335 std::memcpy(dest_ptr, src_buffer, copy_amount); 301 std::memcpy(dest_ptr, src_buffer, copy_amount);
336 break; 302 break;
337 } 303 }
@@ -364,7 +330,8 @@ struct Memory::Impl {
364 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); 330 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
365 const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 331 const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
366 332
367 switch (page_table.attributes[page_index]) { 333 const auto [pointer, type] = page_table.pointers[page_index].PointerType();
334 switch (type) {
368 case Common::PageType::Unmapped: { 335 case Common::PageType::Unmapped: {
369 LOG_ERROR(HW_Memory, 336 LOG_ERROR(HW_Memory,
370 "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 337 "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
@@ -372,10 +339,8 @@ struct Memory::Impl {
372 break; 339 break;
373 } 340 }
374 case Common::PageType::Memory: { 341 case Common::PageType::Memory: {
375 DEBUG_ASSERT(page_table.pointers[page_index]); 342 DEBUG_ASSERT(pointer);
376 343 u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS);
377 u8* const dest_ptr =
378 page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
379 std::memcpy(dest_ptr, src_buffer, copy_amount); 344 std::memcpy(dest_ptr, src_buffer, copy_amount);
380 break; 345 break;
381 } 346 }
@@ -414,7 +379,8 @@ struct Memory::Impl {
414 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); 379 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
415 const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 380 const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
416 381
417 switch (page_table.attributes[page_index]) { 382 const auto [pointer, type] = page_table.pointers[page_index].PointerType();
383 switch (type) {
418 case Common::PageType::Unmapped: { 384 case Common::PageType::Unmapped: {
419 LOG_ERROR(HW_Memory, 385 LOG_ERROR(HW_Memory,
420 "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 386 "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
@@ -422,10 +388,8 @@ struct Memory::Impl {
422 break; 388 break;
423 } 389 }
424 case Common::PageType::Memory: { 390 case Common::PageType::Memory: {
425 DEBUG_ASSERT(page_table.pointers[page_index]); 391 DEBUG_ASSERT(pointer);
426 392 u8* const dest_ptr = pointer + page_offset + (page_index << PAGE_BITS);
427 u8* dest_ptr =
428 page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
429 std::memset(dest_ptr, 0, copy_amount); 393 std::memset(dest_ptr, 0, copy_amount);
430 break; 394 break;
431 } 395 }
@@ -461,7 +425,8 @@ struct Memory::Impl {
461 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); 425 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
462 const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 426 const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
463 427
464 switch (page_table.attributes[page_index]) { 428 const auto [pointer, type] = page_table.pointers[page_index].PointerType();
429 switch (type) {
465 case Common::PageType::Unmapped: { 430 case Common::PageType::Unmapped: {
466 LOG_ERROR(HW_Memory, 431 LOG_ERROR(HW_Memory,
467 "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 432 "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
@@ -470,9 +435,8 @@ struct Memory::Impl {
470 break; 435 break;
471 } 436 }
472 case Common::PageType::Memory: { 437 case Common::PageType::Memory: {
473 DEBUG_ASSERT(page_table.pointers[page_index]); 438 DEBUG_ASSERT(pointer);
474 const u8* src_ptr = 439 const u8* src_ptr = pointer + page_offset + (page_index << PAGE_BITS);
475 page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
476 WriteBlock(process, dest_addr, src_ptr, copy_amount); 440 WriteBlock(process, dest_addr, src_ptr, copy_amount);
477 break; 441 break;
478 } 442 }
@@ -498,34 +462,19 @@ struct Memory::Impl {
498 return CopyBlock(*system.CurrentProcess(), dest_addr, src_addr, size); 462 return CopyBlock(*system.CurrentProcess(), dest_addr, src_addr, size);
499 } 463 }
500 464
501 struct PageEntry {
502 u8* const pointer;
503 const Common::PageType attribute;
504 };
505
506 PageEntry SafePageEntry(std::size_t base) const {
507 std::lock_guard lock{rasterizer_cache_guard};
508 return {
509 .pointer = current_page_table->pointers[base],
510 .attribute = current_page_table->attributes[base],
511 };
512 }
513
514 void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) { 465 void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
515 std::lock_guard lock{rasterizer_cache_guard};
516 if (vaddr == 0) { 466 if (vaddr == 0) {
517 return; 467 return;
518 } 468 }
519
520 // Iterate over a contiguous CPU address space, which corresponds to the specified GPU 469 // Iterate over a contiguous CPU address space, which corresponds to the specified GPU
521 // address space, marking the region as un/cached. The region is marked un/cached at a 470 // address space, marking the region as un/cached. The region is marked un/cached at a
522 // granularity of CPU pages, hence why we iterate on a CPU page basis (note: GPU page size 471 // granularity of CPU pages, hence why we iterate on a CPU page basis (note: GPU page size
523 // is different). This assumes the specified GPU address region is contiguous as well. 472 // is different). This assumes the specified GPU address region is contiguous as well.
524 473
525 u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1; 474 const u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1;
526 for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) { 475 for (u64 i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
527 Common::PageType& page_type{current_page_table->attributes[vaddr >> PAGE_BITS]}; 476 const Common::PageType page_type{
528 477 current_page_table->pointers[vaddr >> PAGE_BITS].Type()};
529 if (cached) { 478 if (cached) {
530 // Switch page type to cached if now cached 479 // Switch page type to cached if now cached
531 switch (page_type) { 480 switch (page_type) {
@@ -534,8 +483,8 @@ struct Memory::Impl {
534 // space, for example, a system module need not have a VRAM mapping. 483 // space, for example, a system module need not have a VRAM mapping.
535 break; 484 break;
536 case Common::PageType::Memory: 485 case Common::PageType::Memory:
537 page_type = Common::PageType::RasterizerCachedMemory; 486 current_page_table->pointers[vaddr >> PAGE_BITS].Store(
538 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr; 487 nullptr, Common::PageType::RasterizerCachedMemory);
539 break; 488 break;
540 case Common::PageType::RasterizerCachedMemory: 489 case Common::PageType::RasterizerCachedMemory:
541 // There can be more than one GPU region mapped per CPU region, so it's common 490 // There can be more than one GPU region mapped per CPU region, so it's common
@@ -556,16 +505,16 @@ struct Memory::Impl {
556 // that this area is already unmarked as cached. 505 // that this area is already unmarked as cached.
557 break; 506 break;
558 case Common::PageType::RasterizerCachedMemory: { 507 case Common::PageType::RasterizerCachedMemory: {
559 u8* pointer{GetPointerFromRasterizerCachedMemory(vaddr & ~PAGE_MASK)}; 508 u8* const pointer{GetPointerFromRasterizerCachedMemory(vaddr & ~PAGE_MASK)};
560 if (pointer == nullptr) { 509 if (pointer == nullptr) {
561 // It's possible that this function has been called while updating the 510 // It's possible that this function has been called while updating the
562 // pagetable after unmapping a VMA. In that case the underlying VMA will no 511 // pagetable after unmapping a VMA. In that case the underlying VMA will no
563 // longer exist, and we should just leave the pagetable entry blank. 512 // longer exist, and we should just leave the pagetable entry blank.
564 page_type = Common::PageType::Unmapped; 513 current_page_table->pointers[vaddr >> PAGE_BITS].Store(
514 nullptr, Common::PageType::Unmapped);
565 } else { 515 } else {
566 current_page_table->pointers[vaddr >> PAGE_BITS] = 516 current_page_table->pointers[vaddr >> PAGE_BITS].Store(
567 pointer - (vaddr & ~PAGE_MASK); 517 pointer - (vaddr & ~PAGE_MASK), Common::PageType::Memory);
568 page_type = Common::PageType::Memory;
569 } 518 }
570 break; 519 break;
571 } 520 }
@@ -595,7 +544,7 @@ struct Memory::Impl {
595 auto& gpu = system.GPU(); 544 auto& gpu = system.GPU();
596 for (u64 i = 0; i < size; i++) { 545 for (u64 i = 0; i < size; i++) {
597 const auto page = base + i; 546 const auto page = base + i;
598 if (page_table.attributes[page] == Common::PageType::RasterizerCachedMemory) { 547 if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) {
599 gpu.FlushAndInvalidateRegion(page << PAGE_BITS, PAGE_SIZE); 548 gpu.FlushAndInvalidateRegion(page << PAGE_BITS, PAGE_SIZE);
600 } 549 }
601 } 550 }
@@ -610,20 +559,18 @@ struct Memory::Impl {
610 "Mapping memory page without a pointer @ {:016x}", base * PAGE_SIZE); 559 "Mapping memory page without a pointer @ {:016x}", base * PAGE_SIZE);
611 560
612 while (base != end) { 561 while (base != end) {
613 page_table.attributes[base] = type; 562 page_table.pointers[base].Store(nullptr, type);
614 page_table.pointers[base] = nullptr;
615 page_table.backing_addr[base] = 0; 563 page_table.backing_addr[base] = 0;
616 564
617 base += 1; 565 base += 1;
618 } 566 }
619 } else { 567 } else {
620 while (base != end) { 568 while (base != end) {
621 page_table.pointers[base] = 569 page_table.pointers[base].Store(
622 system.DeviceMemory().GetPointer(target) - (base << PAGE_BITS); 570 system.DeviceMemory().GetPointer(target) - (base << PAGE_BITS), type);
623 page_table.attributes[base] = type;
624 page_table.backing_addr[base] = target - (base << PAGE_BITS); 571 page_table.backing_addr[base] = target - (base << PAGE_BITS);
625 572
626 ASSERT_MSG(page_table.pointers[base], 573 ASSERT_MSG(page_table.pointers[base].Pointer(),
627 "memory mapping base yield a nullptr within the table"); 574 "memory mapping base yield a nullptr within the table");
628 575
629 base += 1; 576 base += 1;
@@ -646,21 +593,13 @@ struct Memory::Impl {
646 template <typename T> 593 template <typename T>
647 T Read(const VAddr vaddr) { 594 T Read(const VAddr vaddr) {
648 // Avoid adding any extra logic to this fast-path block 595 // Avoid adding any extra logic to this fast-path block
649 if (const u8* const pointer = current_page_table->pointers[vaddr >> PAGE_BITS]) { 596 const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
597 if (const u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
650 T value; 598 T value;
651 std::memcpy(&value, &pointer[vaddr], sizeof(T)); 599 std::memcpy(&value, &pointer[vaddr], sizeof(T));
652 return value; 600 return value;
653 } 601 }
654 602 switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
655 // Otherwise, we need to grab the page with a lock, in case it is currently being modified
656 const auto entry = SafePageEntry(vaddr >> PAGE_BITS);
657 if (entry.pointer) {
658 T value;
659 std::memcpy(&value, &entry.pointer[vaddr], sizeof(T));
660 return value;
661 }
662
663 switch (entry.attribute) {
664 case Common::PageType::Unmapped: 603 case Common::PageType::Unmapped:
665 LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr); 604 LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
666 return 0; 605 return 0;
@@ -692,20 +631,12 @@ struct Memory::Impl {
692 template <typename T> 631 template <typename T>
693 void Write(const VAddr vaddr, const T data) { 632 void Write(const VAddr vaddr, const T data) {
694 // Avoid adding any extra logic to this fast-path block 633 // Avoid adding any extra logic to this fast-path block
695 if (u8* const pointer = current_page_table->pointers[vaddr >> PAGE_BITS]) { 634 const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
635 if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
696 std::memcpy(&pointer[vaddr], &data, sizeof(T)); 636 std::memcpy(&pointer[vaddr], &data, sizeof(T));
697 return; 637 return;
698 } 638 }
699 639 switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
700 // Otherwise, we need to grab the page with a lock, in case it is currently being modified
701 const auto entry = SafePageEntry(vaddr >> PAGE_BITS);
702 if (entry.pointer) {
703 // Memory was mapped, we are done
704 std::memcpy(&entry.pointer[vaddr], &data, sizeof(T));
705 return;
706 }
707
708 switch (entry.attribute) {
709 case Common::PageType::Unmapped: 640 case Common::PageType::Unmapped:
710 LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, 641 LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
711 static_cast<u32>(data), vaddr); 642 static_cast<u32>(data), vaddr);
@@ -726,15 +657,13 @@ struct Memory::Impl {
726 657
727 template <typename T> 658 template <typename T>
728 bool WriteExclusive(const VAddr vaddr, const T data, const T expected) { 659 bool WriteExclusive(const VAddr vaddr, const T data, const T expected) {
729 u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; 660 const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
730 if (page_pointer != nullptr) { 661 if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
731 // NOTE: Avoid adding any extra logic to this fast-path block 662 // NOTE: Avoid adding any extra logic to this fast-path block
732 auto* pointer = reinterpret_cast<volatile T*>(&page_pointer[vaddr]); 663 const auto volatile_pointer = reinterpret_cast<volatile T*>(&pointer[vaddr]);
733 return Common::AtomicCompareAndSwap(pointer, data, expected); 664 return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
734 } 665 }
735 666 switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
736 const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
737 switch (type) {
738 case Common::PageType::Unmapped: 667 case Common::PageType::Unmapped:
739 LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, 668 LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
740 static_cast<u32>(data), vaddr); 669 static_cast<u32>(data), vaddr);
@@ -755,15 +684,13 @@ struct Memory::Impl {
755 } 684 }
756 685
757 bool WriteExclusive128(const VAddr vaddr, const u128 data, const u128 expected) { 686 bool WriteExclusive128(const VAddr vaddr, const u128 data, const u128 expected) {
758 u8* const page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; 687 const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> PAGE_BITS].Raw();
759 if (page_pointer != nullptr) { 688 if (u8* const pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) {
760 // NOTE: Avoid adding any extra logic to this fast-path block 689 // NOTE: Avoid adding any extra logic to this fast-path block
761 auto* pointer = reinterpret_cast<volatile u64*>(&page_pointer[vaddr]); 690 const auto volatile_pointer = reinterpret_cast<volatile u64*>(&pointer[vaddr]);
762 return Common::AtomicCompareAndSwap(pointer, data, expected); 691 return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
763 } 692 }
764 693 switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) {
765 const Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
766 switch (type) {
767 case Common::PageType::Unmapped: 694 case Common::PageType::Unmapped:
768 LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}{:016X}", sizeof(data) * 8, 695 LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}{:016X}", sizeof(data) * 8,
769 static_cast<u64>(data[1]), static_cast<u64>(data[0]), vaddr); 696 static_cast<u64>(data[1]), static_cast<u64>(data[0]), vaddr);
@@ -783,7 +710,6 @@ struct Memory::Impl {
783 return true; 710 return true;
784 } 711 }
785 712
786 mutable std::mutex rasterizer_cache_guard;
787 Common::PageTable* current_page_table = nullptr; 713 Common::PageTable* current_page_table = nullptr;
788 Core::System& system; 714 Core::System& system;
789}; 715};
@@ -799,25 +725,10 @@ void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size
799 impl->MapMemoryRegion(page_table, base, size, target); 725 impl->MapMemoryRegion(page_table, base, size, target);
800} 726}
801 727
802void Memory::MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
803 Common::MemoryHookPointer mmio_handler) {
804 impl->MapIoRegion(page_table, base, size, std::move(mmio_handler));
805}
806
807void Memory::UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) { 728void Memory::UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
808 impl->UnmapRegion(page_table, base, size); 729 impl->UnmapRegion(page_table, base, size);
809} 730}
810 731
811void Memory::AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
812 Common::MemoryHookPointer hook) {
813 impl->AddDebugHook(page_table, base, size, std::move(hook));
814}
815
816void Memory::RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
817 Common::MemoryHookPointer hook) {
818 impl->RemoveDebugHook(page_table, base, size, std::move(hook));
819}
820
821bool Memory::IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) const { 732bool Memory::IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) const {
822 return impl->IsValidVirtualAddress(process, vaddr); 733 return impl->IsValidVirtualAddress(process, vaddr);
823} 734}
diff --git a/src/core/memory.h b/src/core/memory.h
index 4a1cc63f4..705ebb23d 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -8,7 +8,6 @@
8#include <memory> 8#include <memory>
9#include <string> 9#include <string>
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/memory_hook.h"
12 11
13namespace Common { 12namespace Common {
14struct PageTable; 13struct PageTable;
@@ -78,17 +77,6 @@ public:
78 void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target); 77 void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target);
79 78
80 /** 79 /**
81 * Maps a region of the emulated process address space as a IO region.
82 *
83 * @param page_table The page table of the emulated process.
84 * @param base The address to start mapping at. Must be page-aligned.
85 * @param size The amount of bytes to map. Must be page-aligned.
86 * @param mmio_handler The handler that backs the mapping.
87 */
88 void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
89 Common::MemoryHookPointer mmio_handler);
90
91 /**
92 * Unmaps a region of the emulated process address space. 80 * Unmaps a region of the emulated process address space.
93 * 81 *
94 * @param page_table The page table of the emulated process. 82 * @param page_table The page table of the emulated process.
@@ -98,28 +86,6 @@ public:
98 void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size); 86 void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size);
99 87
100 /** 88 /**
101 * Adds a memory hook to intercept reads and writes to given region of memory.
102 *
103 * @param page_table The page table of the emulated process
104 * @param base The starting address to apply the hook to.
105 * @param size The size of the memory region to apply the hook to, in bytes.
106 * @param hook The hook to apply to the region of memory.
107 */
108 void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
109 Common::MemoryHookPointer hook);
110
111 /**
112 * Removes a memory hook from a given range of memory.
113 *
114 * @param page_table The page table of the emulated process.
115 * @param base The starting address to remove the hook from.
116 * @param size The size of the memory region to remove the hook from, in bytes.
117 * @param hook The hook to remove from the specified region of memory.
118 */
119 void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
120 Common::MemoryHookPointer hook);
121
122 /**
123 * Checks whether or not the supplied address is a valid virtual 89 * Checks whether or not the supplied address is a valid virtual
124 * address for the given process. 90 * address for the given process.
125 * 91 *
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 47d9ecf9a..39306509a 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -148,9 +148,4 @@ void RestoreGlobalState(bool is_powered_on) {
148 values.motion_enabled.SetGlobal(true); 148 values.motion_enabled.SetGlobal(true);
149} 149}
150 150
151void Sanitize() {
152 values.use_asynchronous_gpu_emulation.SetValue(
153 values.use_asynchronous_gpu_emulation.GetValue() || values.use_multi_core.GetValue());
154}
155
156} // namespace Settings 151} // namespace Settings
diff --git a/src/core/settings.h b/src/core/settings.h
index d5f8d2b7e..a324530bd 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -131,6 +131,7 @@ struct Values {
131 131
132 bool cpuopt_unsafe_unfuse_fma; 132 bool cpuopt_unsafe_unfuse_fma;
133 bool cpuopt_unsafe_reduce_fp_error; 133 bool cpuopt_unsafe_reduce_fp_error;
134 bool cpuopt_unsafe_inaccurate_nan;
134 135
135 // Renderer 136 // Renderer
136 Setting<RendererBackend> renderer_backend; 137 Setting<RendererBackend> renderer_backend;
@@ -221,7 +222,7 @@ struct Values {
221 bool disable_macro_jit; 222 bool disable_macro_jit;
222 bool extended_logging; 223 bool extended_logging;
223 224
224 // Misceallaneous 225 // Miscellaneous
225 std::string log_filter; 226 std::string log_filter;
226 bool use_dev_keys; 227 bool use_dev_keys;
227 228
@@ -257,7 +258,4 @@ void LogSettings();
257// Restore the global state of all applicable settings in the Values struct 258// Restore the global state of all applicable settings in the Values struct
258void RestoreGlobalState(bool is_powered_on); 259void RestoreGlobalState(bool is_powered_on);
259 260
260// Fixes settings that are known to cause issues with the emulator
261void Sanitize();
262
263} // namespace Settings 261} // namespace Settings
diff --git a/src/input_common/gcadapter/gc_adapter.h b/src/input_common/gcadapter/gc_adapter.h
index f1256c9da..7a6c545bd 100644
--- a/src/input_common/gcadapter/gc_adapter.h
+++ b/src/input_common/gcadapter/gc_adapter.h
@@ -120,17 +120,17 @@ private:
120 /// For use in initialization, querying devices to find the adapter 120 /// For use in initialization, querying devices to find the adapter
121 void Setup(); 121 void Setup();
122 122
123 /// Resets status of all GC controller devices to a disconected state 123 /// Resets status of all GC controller devices to a disconnected state
124 void ResetDevices(); 124 void ResetDevices();
125 125
126 /// Resets status of device connected to a disconected state 126 /// Resets status of device connected to a disconnected state
127 void ResetDevice(std::size_t port); 127 void ResetDevice(std::size_t port);
128 128
129 /// Returns true if we successfully gain access to GC Adapter 129 /// Returns true if we successfully gain access to GC Adapter
130 bool CheckDeviceAccess(); 130 bool CheckDeviceAccess();
131 131
132 /// Captures GC Adapter endpoint address 132 /// Captures GC Adapter endpoint address
133 /// Returns true if the endpoind was set correctly 133 /// Returns true if the endpoint was set correctly
134 bool GetGCEndpoint(libusb_device* device); 134 bool GetGCEndpoint(libusb_device* device);
135 135
136 /// For shutting down, clear all data, join all threads, release usb 136 /// For shutting down, clear all data, join all threads, release usb
diff --git a/src/input_common/gcadapter/gc_poller.cpp b/src/input_common/gcadapter/gc_poller.cpp
index 4d1052414..9670bdeb2 100644
--- a/src/input_common/gcadapter/gc_poller.cpp
+++ b/src/input_common/gcadapter/gc_poller.cpp
@@ -139,10 +139,10 @@ void GCButtonFactory::EndConfiguration() {
139 139
140class GCAnalog final : public Input::AnalogDevice { 140class GCAnalog final : public Input::AnalogDevice {
141public: 141public:
142 explicit GCAnalog(u32 port_, u32 axis_x_, u32 axis_y_, float deadzone_, 142 explicit GCAnalog(u32 port_, u32 axis_x_, u32 axis_y_, bool invert_x_, bool invert_y_,
143 const GCAdapter::Adapter* adapter, float range_) 143 float deadzone_, float range_, const GCAdapter::Adapter* adapter)
144 : port(port_), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), gcadapter(adapter), 144 : port(port_), axis_x(axis_x_), axis_y(axis_y_), invert_x(invert_x_), invert_y(invert_y_),
145 range(range_) {} 145 deadzone(deadzone_), range(range_), gcadapter(adapter) {}
146 146
147 float GetAxis(u32 axis) const { 147 float GetAxis(u32 axis) const {
148 if (gcadapter->DeviceConnected(port)) { 148 if (gcadapter->DeviceConnected(port)) {
@@ -157,7 +157,12 @@ public:
157 std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const { 157 std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const {
158 float x = GetAxis(analog_axis_x); 158 float x = GetAxis(analog_axis_x);
159 float y = GetAxis(analog_axis_y); 159 float y = GetAxis(analog_axis_y);
160 160 if (invert_x) {
161 x = -x;
162 }
163 if (invert_y) {
164 y = -y;
165 }
161 // Make sure the coordinates are in the unit circle, 166 // Make sure the coordinates are in the unit circle,
162 // otherwise normalize it. 167 // otherwise normalize it.
163 float r = x * x + y * y; 168 float r = x * x + y * y;
@@ -200,9 +205,11 @@ private:
200 const u32 port; 205 const u32 port;
201 const u32 axis_x; 206 const u32 axis_x;
202 const u32 axis_y; 207 const u32 axis_y;
208 const bool invert_x;
209 const bool invert_y;
203 const float deadzone; 210 const float deadzone;
204 const GCAdapter::Adapter* gcadapter;
205 const float range; 211 const float range;
212 const GCAdapter::Adapter* gcadapter;
206 mutable std::mutex mutex; 213 mutable std::mutex mutex;
207}; 214};
208 215
@@ -223,8 +230,13 @@ std::unique_ptr<Input::AnalogDevice> GCAnalogFactory::Create(const Common::Param
223 const auto axis_y = static_cast<u32>(params.Get("axis_y", 1)); 230 const auto axis_y = static_cast<u32>(params.Get("axis_y", 1));
224 const auto deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f); 231 const auto deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f);
225 const auto range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f); 232 const auto range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f);
233 const std::string invert_x_value = params.Get("invert_x", "+");
234 const std::string invert_y_value = params.Get("invert_y", "+");
235 const bool invert_x = invert_x_value == "-";
236 const bool invert_y = invert_y_value == "-";
226 237
227 return std::make_unique<GCAnalog>(port, axis_x, axis_y, deadzone, adapter.get(), range); 238 return std::make_unique<GCAnalog>(port, axis_x, axis_y, invert_x, invert_y, deadzone, range,
239 adapter.get());
228} 240}
229 241
230void GCAnalogFactory::BeginConfiguration() { 242void GCAnalogFactory::BeginConfiguration() {
@@ -282,6 +294,8 @@ Common::ParamPackage GCAnalogFactory::GetNextInput() {
282 params.Set("port", controller_number); 294 params.Set("port", controller_number);
283 params.Set("axis_x", analog_x_axis); 295 params.Set("axis_x", analog_x_axis);
284 params.Set("axis_y", analog_y_axis); 296 params.Set("axis_y", analog_y_axis);
297 params.Set("invert_x", "+");
298 params.Set("invert_y", "+");
285 analog_x_axis = -1; 299 analog_x_axis = -1;
286 analog_y_axis = -1; 300 analog_y_axis = -1;
287 controller_number = -1; 301 controller_number = -1;
diff --git a/src/input_common/motion_input.cpp b/src/input_common/motion_input.cpp
index f77ba535d..6a65f175e 100644
--- a/src/input_common/motion_input.cpp
+++ b/src/input_common/motion_input.cpp
@@ -129,7 +129,7 @@ void MotionInput::UpdateOrientation(u64 elapsed_time) {
129 rad_gyro += ki * integral_error; 129 rad_gyro += ki * integral_error;
130 rad_gyro += kd * derivative_error; 130 rad_gyro += kd * derivative_error;
131 } else { 131 } else {
132 // Give more weight to acelerometer values to compensate for the lack of gyro 132 // Give more weight to accelerometer values to compensate for the lack of gyro
133 rad_gyro += 35.0f * kp * real_error; 133 rad_gyro += 35.0f * kp * real_error;
134 rad_gyro += 10.0f * ki * integral_error; 134 rad_gyro += 10.0f * ki * integral_error;
135 rad_gyro += 10.0f * kd * derivative_error; 135 rad_gyro += 10.0f * kd * derivative_error;
diff --git a/src/input_common/mouse/mouse_input.h b/src/input_common/mouse/mouse_input.h
index 65e64bee7..58803c1bf 100644
--- a/src/input_common/mouse/mouse_input.h
+++ b/src/input_common/mouse/mouse_input.h
@@ -20,7 +20,7 @@ enum class MouseButton {
20 Left, 20 Left,
21 Wheel, 21 Wheel,
22 Right, 22 Right,
23 Foward, 23 Forward,
24 Backward, 24 Backward,
25 Undefined, 25 Undefined,
26}; 26};
diff --git a/src/input_common/mouse/mouse_poller.cpp b/src/input_common/mouse/mouse_poller.cpp
index 7445ad3ad..508eb0c7d 100644
--- a/src/input_common/mouse/mouse_poller.cpp
+++ b/src/input_common/mouse/mouse_poller.cpp
@@ -62,10 +62,10 @@ void MouseButtonFactory::EndConfiguration() {
62 62
63class MouseAnalog final : public Input::AnalogDevice { 63class MouseAnalog final : public Input::AnalogDevice {
64public: 64public:
65 explicit MouseAnalog(u32 port_, u32 axis_x_, u32 axis_y_, float deadzone_, float range_, 65 explicit MouseAnalog(u32 port_, u32 axis_x_, u32 axis_y_, bool invert_x_, bool invert_y_,
66 const MouseInput::Mouse* mouse_input_) 66 float deadzone_, float range_, const MouseInput::Mouse* mouse_input_)
67 : button(port_), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), range(range_), 67 : button(port_), axis_x(axis_x_), axis_y(axis_y_), invert_x(invert_x_), invert_y(invert_y_),
68 mouse_input(mouse_input_) {} 68 deadzone(deadzone_), range(range_), mouse_input(mouse_input_) {}
69 69
70 float GetAxis(u32 axis) const { 70 float GetAxis(u32 axis) const {
71 std::lock_guard lock{mutex}; 71 std::lock_guard lock{mutex};
@@ -77,6 +77,12 @@ public:
77 std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const { 77 std::pair<float, float> GetAnalog(u32 analog_axis_x, u32 analog_axis_y) const {
78 float x = GetAxis(analog_axis_x); 78 float x = GetAxis(analog_axis_x);
79 float y = GetAxis(analog_axis_y); 79 float y = GetAxis(analog_axis_y);
80 if (invert_x) {
81 x = -x;
82 }
83 if (invert_y) {
84 y = -y;
85 }
80 86
81 // Make sure the coordinates are in the unit circle, 87 // Make sure the coordinates are in the unit circle,
82 // otherwise normalize it. 88 // otherwise normalize it.
@@ -104,6 +110,8 @@ private:
104 const u32 button; 110 const u32 button;
105 const u32 axis_x; 111 const u32 axis_x;
106 const u32 axis_y; 112 const u32 axis_y;
113 const bool invert_x;
114 const bool invert_y;
107 const float deadzone; 115 const float deadzone;
108 const float range; 116 const float range;
109 const MouseInput::Mouse* mouse_input; 117 const MouseInput::Mouse* mouse_input;
@@ -128,8 +136,13 @@ std::unique_ptr<Input::AnalogDevice> MouseAnalogFactory::Create(
128 const auto axis_y = static_cast<u32>(params.Get("axis_y", 1)); 136 const auto axis_y = static_cast<u32>(params.Get("axis_y", 1));
129 const auto deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f); 137 const auto deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f);
130 const auto range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f); 138 const auto range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f);
139 const std::string invert_x_value = params.Get("invert_x", "+");
140 const std::string invert_y_value = params.Get("invert_y", "+");
141 const bool invert_x = invert_x_value == "-";
142 const bool invert_y = invert_y_value == "-";
131 143
132 return std::make_unique<MouseAnalog>(port, axis_x, axis_y, deadzone, range, mouse_input.get()); 144 return std::make_unique<MouseAnalog>(port, axis_x, axis_y, invert_x, invert_y, deadzone, range,
145 mouse_input.get());
133} 146}
134 147
135void MouseAnalogFactory::BeginConfiguration() { 148void MouseAnalogFactory::BeginConfiguration() {
@@ -153,6 +166,8 @@ Common::ParamPackage MouseAnalogFactory::GetNextInput() const {
153 params.Set("port", static_cast<u16>(pad.button)); 166 params.Set("port", static_cast<u16>(pad.button));
154 params.Set("axis_x", 0); 167 params.Set("axis_x", 0);
155 params.Set("axis_y", 1); 168 params.Set("axis_y", 1);
169 params.Set("invert_x", "+");
170 params.Set("invert_y", "+");
156 return params; 171 return params;
157 } 172 }
158 } 173 }
diff --git a/src/input_common/sdl/sdl_impl.cpp b/src/input_common/sdl/sdl_impl.cpp
index 7827e324c..d32eb732a 100644
--- a/src/input_common/sdl/sdl_impl.cpp
+++ b/src/input_common/sdl/sdl_impl.cpp
@@ -352,13 +352,20 @@ private:
352class SDLAnalog final : public Input::AnalogDevice { 352class SDLAnalog final : public Input::AnalogDevice {
353public: 353public:
354 explicit SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_, 354 explicit SDLAnalog(std::shared_ptr<SDLJoystick> joystick_, int axis_x_, int axis_y_,
355 float deadzone_, float range_) 355 bool invert_x_, bool invert_y_, float deadzone_, float range_)
356 : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), deadzone(deadzone_), 356 : joystick(std::move(joystick_)), axis_x(axis_x_), axis_y(axis_y_), invert_x(invert_x_),
357 range(range_) {} 357 invert_y(invert_y_), deadzone(deadzone_), range(range_) {}
358 358
359 std::tuple<float, float> GetStatus() const override { 359 std::tuple<float, float> GetStatus() const override {
360 const auto [x, y] = joystick->GetAnalog(axis_x, axis_y, range); 360 auto [x, y] = joystick->GetAnalog(axis_x, axis_y, range);
361 const float r = std::sqrt((x * x) + (y * y)); 361 const float r = std::sqrt((x * x) + (y * y));
362 if (invert_x) {
363 x = -x;
364 }
365 if (invert_y) {
366 y = -y;
367 }
368
362 if (r > deadzone) { 369 if (r > deadzone) {
363 return std::make_tuple(x / r * (r - deadzone) / (1 - deadzone), 370 return std::make_tuple(x / r * (r - deadzone) / (1 - deadzone),
364 y / r * (r - deadzone) / (1 - deadzone)); 371 y / r * (r - deadzone) / (1 - deadzone));
@@ -386,6 +393,8 @@ private:
386 std::shared_ptr<SDLJoystick> joystick; 393 std::shared_ptr<SDLJoystick> joystick;
387 const int axis_x; 394 const int axis_x;
388 const int axis_y; 395 const int axis_y;
396 const bool invert_x;
397 const bool invert_y;
389 const float deadzone; 398 const float deadzone;
390 const float range; 399 const float range;
391}; 400};
@@ -572,12 +581,17 @@ public:
572 const int axis_y = params.Get("axis_y", 1); 581 const int axis_y = params.Get("axis_y", 1);
573 const float deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f); 582 const float deadzone = std::clamp(params.Get("deadzone", 0.0f), 0.0f, 1.0f);
574 const float range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f); 583 const float range = std::clamp(params.Get("range", 1.0f), 0.50f, 1.50f);
584 const std::string invert_x_value = params.Get("invert_x", "+");
585 const std::string invert_y_value = params.Get("invert_y", "+");
586 const bool invert_x = invert_x_value == "-";
587 const bool invert_y = invert_y_value == "-";
575 auto joystick = state.GetSDLJoystickByGUID(guid, port); 588 auto joystick = state.GetSDLJoystickByGUID(guid, port);
576 589
577 // This is necessary so accessing GetAxis with axis_x and axis_y won't crash 590 // This is necessary so accessing GetAxis with axis_x and axis_y won't crash
578 joystick->SetAxis(axis_x, 0); 591 joystick->SetAxis(axis_x, 0);
579 joystick->SetAxis(axis_y, 0); 592 joystick->SetAxis(axis_y, 0);
580 return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y, deadzone, range); 593 return std::make_unique<SDLAnalog>(joystick, axis_x, axis_y, invert_x, invert_y, deadzone,
594 range);
581 } 595 }
582 596
583private: 597private:
@@ -886,6 +900,8 @@ Common::ParamPackage BuildParamPackageForAnalog(int port, const std::string& gui
886 params.Set("guid", guid); 900 params.Set("guid", guid);
887 params.Set("axis_x", axis_x); 901 params.Set("axis_x", axis_x);
888 params.Set("axis_y", axis_y); 902 params.Set("axis_y", axis_y);
903 params.Set("invert_x", "+");
904 params.Set("invert_y", "+");
889 return params; 905 return params;
890} 906}
891} // Anonymous namespace 907} // Anonymous namespace
@@ -1014,11 +1030,44 @@ public:
1014 } 1030 }
1015 return {}; 1031 return {};
1016 } 1032 }
1017 [[nodiscard]] std::optional<Common::ParamPackage> FromEvent(const SDL_Event& event) const { 1033 [[nodiscard]] std::optional<Common::ParamPackage> FromEvent(SDL_Event& event) {
1018 switch (event.type) { 1034 switch (event.type) {
1019 case SDL_JOYAXISMOTION: 1035 case SDL_JOYAXISMOTION:
1020 if (std::abs(event.jaxis.value / 32767.0) < 0.5) { 1036 if (!axis_memory.count(event.jaxis.which) ||
1037 !axis_memory[event.jaxis.which].count(event.jaxis.axis)) {
1038 axis_memory[event.jaxis.which][event.jaxis.axis] = event.jaxis.value;
1039 axis_event_count[event.jaxis.which][event.jaxis.axis] = 1;
1021 break; 1040 break;
1041 } else {
1042 axis_event_count[event.jaxis.which][event.jaxis.axis]++;
1043 // The joystick and axis exist in our map if we take this branch, so no checks
1044 // needed
1045 if (std::abs(
1046 (event.jaxis.value - axis_memory[event.jaxis.which][event.jaxis.axis]) /
1047 32767.0) < 0.5) {
1048 break;
1049 } else {
1050 if (axis_event_count[event.jaxis.which][event.jaxis.axis] == 2 &&
1051 IsAxisAtPole(event.jaxis.value) &&
1052 IsAxisAtPole(axis_memory[event.jaxis.which][event.jaxis.axis])) {
1053 // If we have exactly two events and both are near a pole, this is
1054 // likely a digital input masquerading as an analog axis; Instead of
1055 // trying to look at the direction the axis travelled, assume the first
1056 // event was press and the second was release; This should handle most
1057 // digital axes while deferring to the direction of travel for analog
1058 // axes
1059 event.jaxis.value = static_cast<Sint16>(
1060 std::copysign(32767, axis_memory[event.jaxis.which][event.jaxis.axis]));
1061 } else {
1062 // There are more than two events, so this is likely a true analog axis,
1063 // check the direction it travelled
1064 event.jaxis.value = static_cast<Sint16>(std::copysign(
1065 32767,
1066 event.jaxis.value - axis_memory[event.jaxis.which][event.jaxis.axis]));
1067 }
1068 axis_memory.clear();
1069 axis_event_count.clear();
1070 }
1022 } 1071 }
1023 [[fallthrough]]; 1072 [[fallthrough]];
1024 case SDL_JOYBUTTONUP: 1073 case SDL_JOYBUTTONUP:
@@ -1027,6 +1076,16 @@ public:
1027 } 1076 }
1028 return std::nullopt; 1077 return std::nullopt;
1029 } 1078 }
1079
1080private:
1081 // Determine whether an axis value is close to an extreme or center
1082 // Some controllers have a digital D-Pad as a pair of analog sticks, with 3 possible values per
1083 // axis, which is why the center must be considered a pole
1084 bool IsAxisAtPole(int16_t value) const {
1085 return std::abs(value) >= 32767 || std::abs(value) < 327;
1086 }
1087 std::unordered_map<SDL_JoystickID, std::unordered_map<uint8_t, int16_t>> axis_memory;
1088 std::unordered_map<SDL_JoystickID, std::unordered_map<uint8_t, uint32_t>> axis_event_count;
1030}; 1089};
1031 1090
1032class SDLMotionPoller final : public SDLPoller { 1091class SDLMotionPoller final : public SDLPoller {
diff --git a/src/input_common/udp/client.cpp b/src/input_common/udp/client.cpp
index 17a9225d7..412d57896 100644
--- a/src/input_common/udp/client.cpp
+++ b/src/input_common/udp/client.cpp
@@ -225,6 +225,11 @@ void Client::OnPortInfo([[maybe_unused]] Response::PortInfo data) {
225} 225}
226 226
227void Client::OnPadData(Response::PadData data, std::size_t client) { 227void Client::OnPadData(Response::PadData data, std::size_t client) {
228 // Accept packets only for the correct pad
229 if (static_cast<u8>(clients[client].pad_index) != data.info.id) {
230 return;
231 }
232
228 LOG_TRACE(Input, "PadData packet received"); 233 LOG_TRACE(Input, "PadData packet received");
229 if (data.packet_counter == clients[client].packet_sequence) { 234 if (data.packet_counter == clients[client].packet_sequence) {
230 LOG_WARNING( 235 LOG_WARNING(
diff --git a/src/input_common/udp/udp.cpp b/src/input_common/udp/udp.cpp
index 8686a059c..c5da27a38 100644
--- a/src/input_common/udp/udp.cpp
+++ b/src/input_common/udp/udp.cpp
@@ -28,14 +28,14 @@ private:
28 mutable std::mutex mutex; 28 mutable std::mutex mutex;
29}; 29};
30 30
31/// A motion device factory that creates motion devices from JC Adapter 31/// A motion device factory that creates motion devices from a UDP client
32UDPMotionFactory::UDPMotionFactory(std::shared_ptr<CemuhookUDP::Client> client_) 32UDPMotionFactory::UDPMotionFactory(std::shared_ptr<CemuhookUDP::Client> client_)
33 : client(std::move(client_)) {} 33 : client(std::move(client_)) {}
34 34
35/** 35/**
36 * Creates motion device 36 * Creates motion device
37 * @param params contains parameters for creating the device: 37 * @param params contains parameters for creating the device:
38 * - "port": the nth jcpad on the adapter 38 * - "port": the UDP port number
39 */ 39 */
40std::unique_ptr<Input::MotionDevice> UDPMotionFactory::Create(const Common::ParamPackage& params) { 40std::unique_ptr<Input::MotionDevice> UDPMotionFactory::Create(const Common::ParamPackage& params) {
41 auto ip = params.Get("ip", "127.0.0.1"); 41 auto ip = params.Get("ip", "127.0.0.1");
@@ -90,14 +90,14 @@ private:
90 mutable std::mutex mutex; 90 mutable std::mutex mutex;
91}; 91};
92 92
93/// A motion device factory that creates motion devices from JC Adapter 93/// A motion device factory that creates motion devices from a UDP client
94UDPTouchFactory::UDPTouchFactory(std::shared_ptr<CemuhookUDP::Client> client_) 94UDPTouchFactory::UDPTouchFactory(std::shared_ptr<CemuhookUDP::Client> client_)
95 : client(std::move(client_)) {} 95 : client(std::move(client_)) {}
96 96
97/** 97/**
98 * Creates motion device 98 * Creates motion device
99 * @param params contains parameters for creating the device: 99 * @param params contains parameters for creating the device:
100 * - "port": the nth jcpad on the adapter 100 * - "port": the UDP port number
101 */ 101 */
102std::unique_ptr<Input::TouchDevice> UDPTouchFactory::Create(const Common::ParamPackage& params) { 102std::unique_ptr<Input::TouchDevice> UDPTouchFactory::Create(const Common::ParamPackage& params) {
103 auto ip = params.Get("ip", "127.0.0.1"); 103 auto ip = params.Get("ip", "127.0.0.1");
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index d80b0b688..8a606b448 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -4,8 +4,6 @@ add_executable(tests
4 common/fibers.cpp 4 common/fibers.cpp
5 common/param_package.cpp 5 common/param_package.cpp
6 common/ring_buffer.cpp 6 common/ring_buffer.cpp
7 core/arm/arm_test_common.cpp
8 core/arm/arm_test_common.h
9 core/core_timing.cpp 7 core/core_timing.cpp
10 tests.cpp 8 tests.cpp
11) 9)
diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp
index 4757dd2b4..d94492fc6 100644
--- a/src/tests/common/fibers.cpp
+++ b/src/tests/common/fibers.cpp
@@ -207,7 +207,7 @@ static void ThreadStart2_2(u32 id, TestControl2& test_control) {
207} 207}
208 208
209/** This test checks for fiber thread exchange configuration and validates that fibers are 209/** This test checks for fiber thread exchange configuration and validates that fibers are
210 * that a fiber has been succesfully transfered from one thread to another and that the TLS 210 * that a fiber has been successfully transferred from one thread to another and that the TLS
211 * region of the thread is kept while changing fibers. 211 * region of the thread is kept while changing fibers.
212 */ 212 */
213TEST_CASE("Fibers::InterExchange", "[common]") { 213TEST_CASE("Fibers::InterExchange", "[common]") {
@@ -299,7 +299,7 @@ static void ThreadStart3(u32 id, TestControl3& test_control) {
299} 299}
300 300
301/** This test checks for one two threads racing for starting the same fiber. 301/** This test checks for one two threads racing for starting the same fiber.
302 * It checks execution occured in an ordered manner and by no time there were 302 * It checks execution occurred in an ordered manner and by no time there were
303 * two contexts at the same time. 303 * two contexts at the same time.
304 */ 304 */
305TEST_CASE("Fibers::StartRace", "[common]") { 305TEST_CASE("Fibers::StartRace", "[common]") {
diff --git a/src/tests/common/ring_buffer.cpp b/src/tests/common/ring_buffer.cpp
index c883c4d56..54def22da 100644
--- a/src/tests/common/ring_buffer.cpp
+++ b/src/tests/common/ring_buffer.cpp
@@ -20,60 +20,60 @@ TEST_CASE("RingBuffer: Basic Tests", "[common]") {
20 for (std::size_t i = 0; i < 4; i++) { 20 for (std::size_t i = 0; i < 4; i++) {
21 const char elem = static_cast<char>(i); 21 const char elem = static_cast<char>(i);
22 const std::size_t count = buf.Push(&elem, 1); 22 const std::size_t count = buf.Push(&elem, 1);
23 REQUIRE(count == 1); 23 REQUIRE(count == 1U);
24 } 24 }
25 25
26 REQUIRE(buf.Size() == 4); 26 REQUIRE(buf.Size() == 4U);
27 27
28 // Pushing values into a full ring buffer should fail. 28 // Pushing values into a full ring buffer should fail.
29 { 29 {
30 const char elem = static_cast<char>(42); 30 const char elem = static_cast<char>(42);
31 const std::size_t count = buf.Push(&elem, 1); 31 const std::size_t count = buf.Push(&elem, 1);
32 REQUIRE(count == 0); 32 REQUIRE(count == 0U);
33 } 33 }
34 34
35 REQUIRE(buf.Size() == 4); 35 REQUIRE(buf.Size() == 4U);
36 36
37 // Popping multiple values from a ring buffer with values should succeed. 37 // Popping multiple values from a ring buffer with values should succeed.
38 { 38 {
39 const std::vector<char> popped = buf.Pop(2); 39 const std::vector<char> popped = buf.Pop(2);
40 REQUIRE(popped.size() == 2); 40 REQUIRE(popped.size() == 2U);
41 REQUIRE(popped[0] == 0); 41 REQUIRE(popped[0] == 0);
42 REQUIRE(popped[1] == 1); 42 REQUIRE(popped[1] == 1);
43 } 43 }
44 44
45 REQUIRE(buf.Size() == 2); 45 REQUIRE(buf.Size() == 2U);
46 46
47 // Popping a single value from a ring buffer with values should succeed. 47 // Popping a single value from a ring buffer with values should succeed.
48 { 48 {
49 const std::vector<char> popped = buf.Pop(1); 49 const std::vector<char> popped = buf.Pop(1);
50 REQUIRE(popped.size() == 1); 50 REQUIRE(popped.size() == 1U);
51 REQUIRE(popped[0] == 2); 51 REQUIRE(popped[0] == 2);
52 } 52 }
53 53
54 REQUIRE(buf.Size() == 1); 54 REQUIRE(buf.Size() == 1U);
55 55
56 // Pushing more values than space available should partially suceed. 56 // Pushing more values than space available should partially suceed.
57 { 57 {
58 std::vector<char> to_push(6); 58 std::vector<char> to_push(6);
59 std::iota(to_push.begin(), to_push.end(), 88); 59 std::iota(to_push.begin(), to_push.end(), 88);
60 const std::size_t count = buf.Push(to_push); 60 const std::size_t count = buf.Push(to_push);
61 REQUIRE(count == 3); 61 REQUIRE(count == 3U);
62 } 62 }
63 63
64 REQUIRE(buf.Size() == 4); 64 REQUIRE(buf.Size() == 4U);
65 65
66 // Doing an unlimited pop should pop all values. 66 // Doing an unlimited pop should pop all values.
67 { 67 {
68 const std::vector<char> popped = buf.Pop(); 68 const std::vector<char> popped = buf.Pop();
69 REQUIRE(popped.size() == 4); 69 REQUIRE(popped.size() == 4U);
70 REQUIRE(popped[0] == 3); 70 REQUIRE(popped[0] == 3);
71 REQUIRE(popped[1] == 88); 71 REQUIRE(popped[1] == 88);
72 REQUIRE(popped[2] == 89); 72 REQUIRE(popped[2] == 89);
73 REQUIRE(popped[3] == 90); 73 REQUIRE(popped[3] == 90);
74 } 74 }
75 75
76 REQUIRE(buf.Size() == 0); 76 REQUIRE(buf.Size() == 0U);
77} 77}
78 78
79TEST_CASE("RingBuffer: Threaded Test", "[common]") { 79TEST_CASE("RingBuffer: Threaded Test", "[common]") {
@@ -93,7 +93,7 @@ TEST_CASE("RingBuffer: Threaded Test", "[common]") {
93 std::size_t i = 0; 93 std::size_t i = 0;
94 while (i < count) { 94 while (i < count) {
95 if (const std::size_t c = buf.Push(&value[0], 1); c > 0) { 95 if (const std::size_t c = buf.Push(&value[0], 1); c > 0) {
96 REQUIRE(c == 1); 96 REQUIRE(c == 1U);
97 i++; 97 i++;
98 next_value(value); 98 next_value(value);
99 } else { 99 } else {
@@ -108,7 +108,7 @@ TEST_CASE("RingBuffer: Threaded Test", "[common]") {
108 std::size_t i = 0; 108 std::size_t i = 0;
109 while (i < count) { 109 while (i < count) {
110 if (const std::vector<char> v = buf.Pop(1); v.size() > 0) { 110 if (const std::vector<char> v = buf.Pop(1); v.size() > 0) {
111 REQUIRE(v.size() == 2); 111 REQUIRE(v.size() == 2U);
112 REQUIRE(v[0] == value[0]); 112 REQUIRE(v[0] == value[0]);
113 REQUIRE(v[1] == value[1]); 113 REQUIRE(v[1] == value[1]);
114 i++; 114 i++;
@@ -123,7 +123,7 @@ TEST_CASE("RingBuffer: Threaded Test", "[common]") {
123 producer.join(); 123 producer.join();
124 consumer.join(); 124 consumer.join();
125 125
126 REQUIRE(buf.Size() == 0); 126 REQUIRE(buf.Size() == 0U);
127 printf("RingBuffer: Threaded Test: full: %zu, empty: %zu\n", full, empty); 127 printf("RingBuffer: Threaded Test: full: %zu, empty: %zu\n", full, empty);
128} 128}
129 129
diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp
deleted file mode 100644
index e54674d11..000000000
--- a/src/tests/core/arm/arm_test_common.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6
7#include "common/page_table.h"
8#include "core/core.h"
9#include "core/hle/kernel/memory/page_table.h"
10#include "core/hle/kernel/process.h"
11#include "core/memory.h"
12#include "tests/core/arm/arm_test_common.h"
13
14namespace ArmTests {
15
16TestEnvironment::TestEnvironment(bool mutable_memory_)
17 : mutable_memory(mutable_memory_),
18 test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} {
19 auto& system = Core::System::GetInstance();
20
21 auto process = Kernel::Process::Create(system, "", Kernel::Process::ProcessType::Userland);
22 page_table = &process->PageTable().PageTableImpl();
23
24 system.Memory().MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
25 system.Memory().MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);
26
27 kernel.MakeCurrentProcess(process.get());
28}
29
30TestEnvironment::~TestEnvironment() {
31 auto& system = Core::System::GetInstance();
32 system.Memory().UnmapRegion(*page_table, 0x80000000, 0x80000000);
33 system.Memory().UnmapRegion(*page_table, 0x00000000, 0x80000000);
34}
35
36void TestEnvironment::SetMemory64(VAddr vaddr, u64 value) {
37 SetMemory32(vaddr + 0, static_cast<u32>(value));
38 SetMemory32(vaddr + 4, static_cast<u32>(value >> 32));
39}
40
41void TestEnvironment::SetMemory32(VAddr vaddr, u32 value) {
42 SetMemory16(vaddr + 0, static_cast<u16>(value));
43 SetMemory16(vaddr + 2, static_cast<u16>(value >> 16));
44}
45
46void TestEnvironment::SetMemory16(VAddr vaddr, u16 value) {
47 SetMemory8(vaddr + 0, static_cast<u8>(value));
48 SetMemory8(vaddr + 1, static_cast<u8>(value >> 8));
49}
50
51void TestEnvironment::SetMemory8(VAddr vaddr, u8 value) {
52 test_memory->data[vaddr] = value;
53}
54
55std::vector<WriteRecord> TestEnvironment::GetWriteRecords() const {
56 return write_records;
57}
58
59void TestEnvironment::ClearWriteRecords() {
60 write_records.clear();
61}
62
63TestEnvironment::TestMemory::~TestMemory() {}
64
65std::optional<bool> TestEnvironment::TestMemory::IsValidAddress(VAddr addr) {
66 return true;
67}
68
69std::optional<u8> TestEnvironment::TestMemory::Read8(VAddr addr) {
70 const auto iter = data.find(addr);
71
72 if (iter == data.end()) {
73 // Some arbitrary data
74 return static_cast<u8>(addr);
75 }
76
77 return iter->second;
78}
79
80std::optional<u16> TestEnvironment::TestMemory::Read16(VAddr addr) {
81 return *Read8(addr) | static_cast<u16>(*Read8(addr + 1)) << 8;
82}
83
84std::optional<u32> TestEnvironment::TestMemory::Read32(VAddr addr) {
85 return *Read16(addr) | static_cast<u32>(*Read16(addr + 2)) << 16;
86}
87
88std::optional<u64> TestEnvironment::TestMemory::Read64(VAddr addr) {
89 return *Read32(addr) | static_cast<u64>(*Read32(addr + 4)) << 32;
90}
91
92bool TestEnvironment::TestMemory::ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) {
93 VAddr addr = src_addr;
94 u8* data = static_cast<u8*>(dest_buffer);
95
96 for (std::size_t i = 0; i < size; i++, addr++, data++) {
97 *data = *Read8(addr);
98 }
99
100 return true;
101}
102
103bool TestEnvironment::TestMemory::Write8(VAddr addr, u8 data) {
104 env->write_records.emplace_back(8, addr, data);
105 if (env->mutable_memory)
106 env->SetMemory8(addr, data);
107 return true;
108}
109
110bool TestEnvironment::TestMemory::Write16(VAddr addr, u16 data) {
111 env->write_records.emplace_back(16, addr, data);
112 if (env->mutable_memory)
113 env->SetMemory16(addr, data);
114 return true;
115}
116
117bool TestEnvironment::TestMemory::Write32(VAddr addr, u32 data) {
118 env->write_records.emplace_back(32, addr, data);
119 if (env->mutable_memory)
120 env->SetMemory32(addr, data);
121 return true;
122}
123
124bool TestEnvironment::TestMemory::Write64(VAddr addr, u64 data) {
125 env->write_records.emplace_back(64, addr, data);
126 if (env->mutable_memory)
127 env->SetMemory64(addr, data);
128 return true;
129}
130
131bool TestEnvironment::TestMemory::WriteBlock(VAddr dest_addr, const void* src_buffer,
132 std::size_t size) {
133 VAddr addr = dest_addr;
134 const u8* data = static_cast<const u8*>(src_buffer);
135
136 for (std::size_t i = 0; i < size; i++, addr++, data++) {
137 env->write_records.emplace_back(8, addr, *data);
138 if (env->mutable_memory)
139 env->SetMemory8(addr, *data);
140 }
141
142 return true;
143}
144
145} // namespace ArmTests
diff --git a/src/tests/core/arm/arm_test_common.h b/src/tests/core/arm/arm_test_common.h
deleted file mode 100644
index d145dbfcc..000000000
--- a/src/tests/core/arm/arm_test_common.h
+++ /dev/null
@@ -1,93 +0,0 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <tuple>
8#include <unordered_map>
9#include <vector>
10
11#include "common/common_types.h"
12#include "common/memory_hook.h"
13#include "core/hle/kernel/kernel.h"
14
15namespace Common {
16struct PageTable;
17}
18
19namespace ArmTests {
20
21struct WriteRecord {
22 WriteRecord(std::size_t size, VAddr addr, u64 data) : size(size), addr(addr), data(data) {}
23 std::size_t size;
24 VAddr addr;
25 u64 data;
26 bool operator==(const WriteRecord& o) const {
27 return std::tie(size, addr, data) == std::tie(o.size, o.addr, o.data);
28 }
29};
30
31class TestEnvironment final {
32public:
33 /*
34 * Inititalise test environment
35 * @param mutable_memory If false, writes to memory can never be read back.
36 * (Memory is immutable.)
37 */
38 explicit TestEnvironment(bool mutable_memory = false);
39
40 /// Shutdown test environment
41 ~TestEnvironment();
42
43 /// Sets value at memory location vaddr.
44 void SetMemory8(VAddr vaddr, u8 value);
45 void SetMemory16(VAddr vaddr, u16 value);
46 void SetMemory32(VAddr vaddr, u32 value);
47 void SetMemory64(VAddr vaddr, u64 value);
48
49 /**
50 * Whenever Memory::Write{8,16,32,64} is called within the test environment,
51 * a new write-record is made.
52 * @returns A vector of write records made since they were last cleared.
53 */
54 std::vector<WriteRecord> GetWriteRecords() const;
55
56 /// Empties the internal write-record store.
57 void ClearWriteRecords();
58
59private:
60 friend struct TestMemory;
61 struct TestMemory final : Common::MemoryHook {
62 explicit TestMemory(TestEnvironment* env_) : env(env_) {}
63 TestEnvironment* env;
64
65 ~TestMemory() override;
66
67 std::optional<bool> IsValidAddress(VAddr addr) override;
68
69 std::optional<u8> Read8(VAddr addr) override;
70 std::optional<u16> Read16(VAddr addr) override;
71 std::optional<u32> Read32(VAddr addr) override;
72 std::optional<u64> Read64(VAddr addr) override;
73
74 bool ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) override;
75
76 bool Write8(VAddr addr, u8 data) override;
77 bool Write16(VAddr addr, u16 data) override;
78 bool Write32(VAddr addr, u32 data) override;
79 bool Write64(VAddr addr, u64 data) override;
80
81 bool WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size) override;
82
83 std::unordered_map<VAddr, u8> data;
84 };
85
86 bool mutable_memory;
87 std::shared_ptr<TestMemory> test_memory;
88 std::vector<WriteRecord> write_records;
89 Common::PageTable* page_table = nullptr;
90 Kernel::KernelCore kernel;
91};
92
93} // namespace ArmTests
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 5b73724ce..f7b9d7f86 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -25,6 +25,7 @@ add_library(video_core STATIC
25 command_classes/vic.h 25 command_classes/vic.h
26 compatible_formats.cpp 26 compatible_formats.cpp
27 compatible_formats.h 27 compatible_formats.h
28 delayed_destruction_ring.h
28 dirty_flags.cpp 29 dirty_flags.cpp
29 dirty_flags.h 30 dirty_flags.h
30 dma_pusher.cpp 31 dma_pusher.cpp
@@ -47,6 +48,7 @@ add_library(video_core STATIC
47 engines/shader_bytecode.h 48 engines/shader_bytecode.h
48 engines/shader_header.h 49 engines/shader_header.h
49 engines/shader_type.h 50 engines/shader_type.h
51 framebuffer_config.h
50 macro/macro.cpp 52 macro/macro.cpp
51 macro/macro.h 53 macro/macro.h
52 macro/macro_hle.cpp 54 macro/macro_hle.cpp
@@ -58,10 +60,6 @@ add_library(video_core STATIC
58 fence_manager.h 60 fence_manager.h
59 gpu.cpp 61 gpu.cpp
60 gpu.h 62 gpu.h
61 gpu_asynch.cpp
62 gpu_asynch.h
63 gpu_synch.cpp
64 gpu_synch.h
65 gpu_thread.cpp 63 gpu_thread.cpp
66 gpu_thread.h 64 gpu_thread.h
67 guest_driver.cpp 65 guest_driver.cpp
@@ -84,14 +82,10 @@ add_library(video_core STATIC
84 renderer_opengl/gl_device.h 82 renderer_opengl/gl_device.h
85 renderer_opengl/gl_fence_manager.cpp 83 renderer_opengl/gl_fence_manager.cpp
86 renderer_opengl/gl_fence_manager.h 84 renderer_opengl/gl_fence_manager.h
87 renderer_opengl/gl_framebuffer_cache.cpp
88 renderer_opengl/gl_framebuffer_cache.h
89 renderer_opengl/gl_rasterizer.cpp 85 renderer_opengl/gl_rasterizer.cpp
90 renderer_opengl/gl_rasterizer.h 86 renderer_opengl/gl_rasterizer.h
91 renderer_opengl/gl_resource_manager.cpp 87 renderer_opengl/gl_resource_manager.cpp
92 renderer_opengl/gl_resource_manager.h 88 renderer_opengl/gl_resource_manager.h
93 renderer_opengl/gl_sampler_cache.cpp
94 renderer_opengl/gl_sampler_cache.h
95 renderer_opengl/gl_shader_cache.cpp 89 renderer_opengl/gl_shader_cache.cpp
96 renderer_opengl/gl_shader_cache.h 90 renderer_opengl/gl_shader_cache.h
97 renderer_opengl/gl_shader_decompiler.cpp 91 renderer_opengl/gl_shader_decompiler.cpp
@@ -113,14 +107,14 @@ add_library(video_core STATIC
113 renderer_opengl/maxwell_to_gl.h 107 renderer_opengl/maxwell_to_gl.h
114 renderer_opengl/renderer_opengl.cpp 108 renderer_opengl/renderer_opengl.cpp
115 renderer_opengl/renderer_opengl.h 109 renderer_opengl/renderer_opengl.h
116 renderer_opengl/utils.cpp 110 renderer_opengl/util_shaders.cpp
117 renderer_opengl/utils.h 111 renderer_opengl/util_shaders.h
112 renderer_vulkan/blit_image.cpp
113 renderer_vulkan/blit_image.h
118 renderer_vulkan/fixed_pipeline_state.cpp 114 renderer_vulkan/fixed_pipeline_state.cpp
119 renderer_vulkan/fixed_pipeline_state.h 115 renderer_vulkan/fixed_pipeline_state.h
120 renderer_vulkan/maxwell_to_vk.cpp 116 renderer_vulkan/maxwell_to_vk.cpp
121 renderer_vulkan/maxwell_to_vk.h 117 renderer_vulkan/maxwell_to_vk.h
122 renderer_vulkan/nsight_aftermath_tracker.cpp
123 renderer_vulkan/nsight_aftermath_tracker.h
124 renderer_vulkan/renderer_vulkan.h 118 renderer_vulkan/renderer_vulkan.h
125 renderer_vulkan/renderer_vulkan.cpp 119 renderer_vulkan/renderer_vulkan.cpp
126 renderer_vulkan/vk_blit_screen.cpp 120 renderer_vulkan/vk_blit_screen.cpp
@@ -135,14 +129,10 @@ add_library(video_core STATIC
135 renderer_vulkan/vk_compute_pipeline.h 129 renderer_vulkan/vk_compute_pipeline.h
136 renderer_vulkan/vk_descriptor_pool.cpp 130 renderer_vulkan/vk_descriptor_pool.cpp
137 renderer_vulkan/vk_descriptor_pool.h 131 renderer_vulkan/vk_descriptor_pool.h
138 renderer_vulkan/vk_device.cpp
139 renderer_vulkan/vk_device.h
140 renderer_vulkan/vk_fence_manager.cpp 132 renderer_vulkan/vk_fence_manager.cpp
141 renderer_vulkan/vk_fence_manager.h 133 renderer_vulkan/vk_fence_manager.h
142 renderer_vulkan/vk_graphics_pipeline.cpp 134 renderer_vulkan/vk_graphics_pipeline.cpp
143 renderer_vulkan/vk_graphics_pipeline.h 135 renderer_vulkan/vk_graphics_pipeline.h
144 renderer_vulkan/vk_image.cpp
145 renderer_vulkan/vk_image.h
146 renderer_vulkan/vk_master_semaphore.cpp 136 renderer_vulkan/vk_master_semaphore.cpp
147 renderer_vulkan/vk_master_semaphore.h 137 renderer_vulkan/vk_master_semaphore.h
148 renderer_vulkan/vk_memory_manager.cpp 138 renderer_vulkan/vk_memory_manager.cpp
@@ -153,12 +143,8 @@ add_library(video_core STATIC
153 renderer_vulkan/vk_query_cache.h 143 renderer_vulkan/vk_query_cache.h
154 renderer_vulkan/vk_rasterizer.cpp 144 renderer_vulkan/vk_rasterizer.cpp
155 renderer_vulkan/vk_rasterizer.h 145 renderer_vulkan/vk_rasterizer.h
156 renderer_vulkan/vk_renderpass_cache.cpp
157 renderer_vulkan/vk_renderpass_cache.h
158 renderer_vulkan/vk_resource_pool.cpp 146 renderer_vulkan/vk_resource_pool.cpp
159 renderer_vulkan/vk_resource_pool.h 147 renderer_vulkan/vk_resource_pool.h
160 renderer_vulkan/vk_sampler_cache.cpp
161 renderer_vulkan/vk_sampler_cache.h
162 renderer_vulkan/vk_scheduler.cpp 148 renderer_vulkan/vk_scheduler.cpp
163 renderer_vulkan/vk_scheduler.h 149 renderer_vulkan/vk_scheduler.h
164 renderer_vulkan/vk_shader_decompiler.cpp 150 renderer_vulkan/vk_shader_decompiler.cpp
@@ -177,10 +163,6 @@ add_library(video_core STATIC
177 renderer_vulkan/vk_texture_cache.h 163 renderer_vulkan/vk_texture_cache.h
178 renderer_vulkan/vk_update_descriptor.cpp 164 renderer_vulkan/vk_update_descriptor.cpp
179 renderer_vulkan/vk_update_descriptor.h 165 renderer_vulkan/vk_update_descriptor.h
180 renderer_vulkan/wrapper.cpp
181 renderer_vulkan/wrapper.h
182 sampler_cache.cpp
183 sampler_cache.h
184 shader_cache.h 166 shader_cache.h
185 shader_notify.cpp 167 shader_notify.cpp
186 shader_notify.h 168 shader_notify.h
@@ -237,25 +219,52 @@ add_library(video_core STATIC
237 shader/transform_feedback.h 219 shader/transform_feedback.h
238 surface.cpp 220 surface.cpp
239 surface.h 221 surface.h
222 texture_cache/accelerated_swizzle.cpp
223 texture_cache/accelerated_swizzle.h
224 texture_cache/decode_bc4.cpp
225 texture_cache/decode_bc4.h
226 texture_cache/descriptor_table.h
227 texture_cache/formatter.cpp
228 texture_cache/formatter.h
240 texture_cache/format_lookup_table.cpp 229 texture_cache/format_lookup_table.cpp
241 texture_cache/format_lookup_table.h 230 texture_cache/format_lookup_table.h
242 texture_cache/surface_base.cpp 231 texture_cache/image_base.cpp
243 texture_cache/surface_base.h 232 texture_cache/image_base.h
244 texture_cache/surface_params.cpp 233 texture_cache/image_info.cpp
245 texture_cache/surface_params.h 234 texture_cache/image_info.h
246 texture_cache/surface_view.cpp 235 texture_cache/image_view_base.cpp
247 texture_cache/surface_view.h 236 texture_cache/image_view_base.h
237 texture_cache/image_view_info.cpp
238 texture_cache/image_view_info.h
239 texture_cache/render_targets.h
240 texture_cache/samples_helper.h
241 texture_cache/slot_vector.h
248 texture_cache/texture_cache.h 242 texture_cache/texture_cache.h
243 texture_cache/types.h
244 texture_cache/util.cpp
245 texture_cache/util.h
249 textures/astc.cpp 246 textures/astc.cpp
250 textures/astc.h 247 textures/astc.h
251 textures/convert.cpp
252 textures/convert.h
253 textures/decoders.cpp 248 textures/decoders.cpp
254 textures/decoders.h 249 textures/decoders.h
255 textures/texture.cpp 250 textures/texture.cpp
256 textures/texture.h 251 textures/texture.h
257 video_core.cpp 252 video_core.cpp
258 video_core.h 253 video_core.h
254 vulkan_common/vulkan_debug_callback.cpp
255 vulkan_common/vulkan_debug_callback.h
256 vulkan_common/vulkan_device.cpp
257 vulkan_common/vulkan_device.h
258 vulkan_common/vulkan_instance.cpp
259 vulkan_common/vulkan_instance.h
260 vulkan_common/vulkan_library.cpp
261 vulkan_common/vulkan_library.h
262 vulkan_common/vulkan_surface.cpp
263 vulkan_common/vulkan_surface.h
264 vulkan_common/vulkan_wrapper.cpp
265 vulkan_common/vulkan_wrapper.h
266 vulkan_common/nsight_aftermath_tracker.cpp
267 vulkan_common/nsight_aftermath_tracker.h
259) 268)
260 269
261create_target_directory_groups(video_core) 270create_target_directory_groups(video_core)
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 38961f3fd..83b9ee871 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -118,20 +118,17 @@ public:
118 /// Prepares the buffer cache for data uploading 118 /// Prepares the buffer cache for data uploading
119 /// @param max_size Maximum number of bytes that will be uploaded 119 /// @param max_size Maximum number of bytes that will be uploaded
120 /// @return True when a stream buffer invalidation was required, false otherwise 120 /// @return True when a stream buffer invalidation was required, false otherwise
121 bool Map(std::size_t max_size) { 121 void Map(std::size_t max_size) {
122 std::lock_guard lock{mutex}; 122 std::lock_guard lock{mutex};
123 123
124 bool invalidated; 124 std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4);
125 std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
126 buffer_offset = buffer_offset_base; 125 buffer_offset = buffer_offset_base;
127
128 return invalidated;
129 } 126 }
130 127
131 /// Finishes the upload stream 128 /// Finishes the upload stream
132 void Unmap() { 129 void Unmap() {
133 std::lock_guard lock{mutex}; 130 std::lock_guard lock{mutex};
134 stream_buffer->Unmap(buffer_offset - buffer_offset_base); 131 stream_buffer.Unmap(buffer_offset - buffer_offset_base);
135 } 132 }
136 133
137 /// Function called at the end of each frame, inteded for deferred operations 134 /// Function called at the end of each frame, inteded for deferred operations
@@ -261,9 +258,9 @@ public:
261protected: 258protected:
262 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, 259 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
263 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 260 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
264 std::unique_ptr<StreamBuffer> stream_buffer_) 261 StreamBuffer& stream_buffer_)
265 : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, 262 : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_},
266 stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {} 263 stream_buffer{stream_buffer_} {}
267 264
268 ~BufferCache() = default; 265 ~BufferCache() = default;
269 266
@@ -441,7 +438,7 @@ private:
441 438
442 buffer_ptr += size; 439 buffer_ptr += size;
443 buffer_offset += size; 440 buffer_offset += size;
444 return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()}; 441 return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()};
445 } 442 }
446 443
447 void AlignBuffer(std::size_t alignment) { 444 void AlignBuffer(std::size_t alignment) {
@@ -567,9 +564,7 @@ private:
567 VideoCore::RasterizerInterface& rasterizer; 564 VideoCore::RasterizerInterface& rasterizer;
568 Tegra::MemoryManager& gpu_memory; 565 Tegra::MemoryManager& gpu_memory;
569 Core::Memory::Memory& cpu_memory; 566 Core::Memory::Memory& cpu_memory;
570 567 StreamBuffer& stream_buffer;
571 std::unique_ptr<StreamBuffer> stream_buffer;
572 BufferType stream_buffer_handle;
573 568
574 u8* buffer_ptr = nullptr; 569 u8* buffer_ptr = nullptr;
575 u64 buffer_offset = 0; 570 u64 buffer_offset = 0;
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
index e3e7432f7..94679d5d1 100644
--- a/src/video_core/cdma_pusher.cpp
+++ b/src/video_core/cdma_pusher.cpp
@@ -33,8 +33,7 @@ CDmaPusher::CDmaPusher(GPU& gpu_)
33 : gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)), 33 : gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)),
34 vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)), 34 vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)),
35 host1x_processor(std::make_unique<Host1x>(gpu)), 35 host1x_processor(std::make_unique<Host1x>(gpu)),
36 nvdec_sync(std::make_unique<SyncptIncrManager>(gpu)), 36 sync_manager(std::make_unique<SyncptIncrManager>(gpu)) {}
37 vic_sync(std::make_unique<SyncptIncrManager>(gpu)) {}
38 37
39CDmaPusher::~CDmaPusher() = default; 38CDmaPusher::~CDmaPusher() = default;
40 39
@@ -110,10 +109,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
110 const auto syncpoint_id = static_cast<u32>(data & 0xFF); 109 const auto syncpoint_id = static_cast<u32>(data & 0xFF);
111 const auto cond = static_cast<u32>((data >> 8) & 0xFF); 110 const auto cond = static_cast<u32>((data >> 8) & 0xFF);
112 if (cond == 0) { 111 if (cond == 0) {
113 nvdec_sync->Increment(syncpoint_id); 112 sync_manager->Increment(syncpoint_id);
114 } else { 113 } else {
115 nvdec_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id); 114 sync_manager->SignalDone(
116 nvdec_sync->SignalDone(syncpoint_id); 115 sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
117 } 116 }
118 break; 117 break;
119 } 118 }
@@ -135,10 +134,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
135 const auto syncpoint_id = static_cast<u32>(data & 0xFF); 134 const auto syncpoint_id = static_cast<u32>(data & 0xFF);
136 const auto cond = static_cast<u32>((data >> 8) & 0xFF); 135 const auto cond = static_cast<u32>((data >> 8) & 0xFF);
137 if (cond == 0) { 136 if (cond == 0) {
138 vic_sync->Increment(syncpoint_id); 137 sync_manager->Increment(syncpoint_id);
139 } else { 138 } else {
140 vic_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id); 139 sync_manager->SignalDone(
141 vic_sync->SignalDone(syncpoint_id); 140 sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
142 } 141 }
143 break; 142 break;
144 } 143 }
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index 0db1cd646..8ca70b6dd 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -116,12 +116,10 @@ private:
116 void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments); 116 void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments);
117 117
118 GPU& gpu; 118 GPU& gpu;
119 119 std::shared_ptr<Tegra::Nvdec> nvdec_processor;
120 std::shared_ptr<Nvdec> nvdec_processor; 120 std::unique_ptr<Tegra::Vic> vic_processor;
121 std::unique_ptr<Vic> vic_processor; 121 std::unique_ptr<Tegra::Host1x> host1x_processor;
122 std::unique_ptr<Host1x> host1x_processor; 122 std::unique_ptr<SyncptIncrManager> sync_manager;
123 std::unique_ptr<SyncptIncrManager> nvdec_sync;
124 std::unique_ptr<SyncptIncrManager> vic_sync;
125 ChClassId current_class{}; 123 ChClassId current_class{};
126 ThiRegisters vic_thi_state{}; 124 ThiRegisters vic_thi_state{};
127 ThiRegisters nvdec_thi_state{}; 125 ThiRegisters nvdec_thi_state{};
diff --git a/src/video_core/command_classes/host1x.cpp b/src/video_core/command_classes/host1x.cpp
index c4dd4881a..b12494528 100644
--- a/src/video_core/command_classes/host1x.cpp
+++ b/src/video_core/command_classes/host1x.cpp
@@ -10,22 +10,14 @@ Tegra::Host1x::Host1x(GPU& gpu_) : gpu(gpu_) {}
10 10
11Tegra::Host1x::~Host1x() = default; 11Tegra::Host1x::~Host1x() = default;
12 12
13void Tegra::Host1x::StateWrite(u32 offset, u32 arguments) { 13void Tegra::Host1x::ProcessMethod(Method method, u32 argument) {
14 u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u32);
15 std::memcpy(state_offset, &arguments, sizeof(u32));
16}
17
18void Tegra::Host1x::ProcessMethod(Method method, const std::vector<u32>& arguments) {
19 StateWrite(static_cast<u32>(method), arguments[0]);
20 switch (method) { 14 switch (method) {
21 case Method::WaitSyncpt:
22 Execute(arguments[0]);
23 break;
24 case Method::LoadSyncptPayload32: 15 case Method::LoadSyncptPayload32:
25 syncpoint_value = arguments[0]; 16 syncpoint_value = argument;
26 break; 17 break;
18 case Method::WaitSyncpt:
27 case Method::WaitSyncpt32: 19 case Method::WaitSyncpt32:
28 Execute(arguments[0]); 20 Execute(argument);
29 break; 21 break;
30 default: 22 default:
31 UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method)); 23 UNIMPLEMENTED_MSG("Host1x method 0x{:X}", static_cast<u32>(method));
@@ -34,6 +26,5 @@ void Tegra::Host1x::ProcessMethod(Method method, const std::vector<u32>& argumen
34} 26}
35 27
36void Tegra::Host1x::Execute(u32 data) { 28void Tegra::Host1x::Execute(u32 data) {
37 // This method waits on a valid syncpoint. 29 gpu.WaitFence(data, syncpoint_value);
38 // TODO: Implement when proper Async is in place
39} 30}
diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/command_classes/host1x.h
index 013eaa0c1..7e94799dd 100644
--- a/src/video_core/command_classes/host1x.h
+++ b/src/video_core/command_classes/host1x.h
@@ -14,64 +14,23 @@ class Nvdec;
14 14
15class Host1x { 15class Host1x {
16public: 16public:
17 struct Host1xClassRegisters {
18 u32 incr_syncpt{};
19 u32 incr_syncpt_ctrl{};
20 u32 incr_syncpt_error{};
21 INSERT_PADDING_WORDS(5);
22 u32 wait_syncpt{};
23 u32 wait_syncpt_base{};
24 u32 wait_syncpt_incr{};
25 u32 load_syncpt_base{};
26 u32 incr_syncpt_base{};
27 u32 clear{};
28 u32 wait{};
29 u32 wait_with_interrupt{};
30 u32 delay_use{};
31 u32 tick_count_high{};
32 u32 tick_count_low{};
33 u32 tick_ctrl{};
34 INSERT_PADDING_WORDS(23);
35 u32 ind_ctrl{};
36 u32 ind_off2{};
37 u32 ind_off{};
38 std::array<u32, 31> ind_data{};
39 INSERT_PADDING_WORDS(1);
40 u32 load_syncpoint_payload32{};
41 u32 stall_ctrl{};
42 u32 wait_syncpt32{};
43 u32 wait_syncpt_base32{};
44 u32 load_syncpt_base32{};
45 u32 incr_syncpt_base32{};
46 u32 stall_count_high{};
47 u32 stall_count_low{};
48 u32 xref_ctrl{};
49 u32 channel_xref_high{};
50 u32 channel_xref_low{};
51 };
52 static_assert(sizeof(Host1xClassRegisters) == 0x164, "Host1xClassRegisters is an invalid size");
53
54 enum class Method : u32 { 17 enum class Method : u32 {
55 WaitSyncpt = offsetof(Host1xClassRegisters, wait_syncpt) / 4, 18 WaitSyncpt = 0x8,
56 LoadSyncptPayload32 = offsetof(Host1xClassRegisters, load_syncpoint_payload32) / 4, 19 LoadSyncptPayload32 = 0x4e,
57 WaitSyncpt32 = offsetof(Host1xClassRegisters, wait_syncpt32) / 4, 20 WaitSyncpt32 = 0x50,
58 }; 21 };
59 22
60 explicit Host1x(GPU& gpu); 23 explicit Host1x(GPU& gpu);
61 ~Host1x(); 24 ~Host1x();
62 25
63 /// Writes the method into the state, Invoke Execute() if encountered 26 /// Writes the method into the state, Invoke Execute() if encountered
64 void ProcessMethod(Method method, const std::vector<u32>& arguments); 27 void ProcessMethod(Method method, u32 argument);
65 28
66private: 29private:
67 /// For Host1x, execute is waiting on a syncpoint previously written into the state 30 /// For Host1x, execute is waiting on a syncpoint previously written into the state
68 void Execute(u32 data); 31 void Execute(u32 data);
69 32
70 /// Write argument into the provided offset
71 void StateWrite(u32 offset, u32 arguments);
72
73 u32 syncpoint_value{}; 33 u32 syncpoint_value{};
74 Host1xClassRegisters state{};
75 GPU& gpu; 34 GPU& gpu;
76}; 35};
77 36
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index 66e21ce9c..55e632346 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -9,7 +9,7 @@
9#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/gpu.h" 10#include "video_core/gpu.h"
11#include "video_core/memory_manager.h" 11#include "video_core/memory_manager.h"
12#include "video_core/texture_cache/surface_params.h" 12#include "video_core/textures/decoders.h"
13 13
14extern "C" { 14extern "C" {
15#include <libswscale/swscale.h> 15#include <libswscale/swscale.h>
@@ -53,7 +53,7 @@ void Vic::ProcessMethod(Method method, const std::vector<u32>& arguments) {
53 53
54void Vic::Execute() { 54void Vic::Execute() {
55 if (output_surface_luma_address == 0) { 55 if (output_surface_luma_address == 0) {
56 LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Recieved 0x{:X}", 56 LOG_ERROR(Service_NVDRV, "VIC Luma address not set. Received 0x{:X}",
57 vic_state.output_surface.luma_offset); 57 vic_state.output_surface.luma_offset);
58 return; 58 return;
59 } 59 }
@@ -105,9 +105,9 @@ void Vic::Execute() {
105 const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, 105 const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1,
106 block_height, 0); 106 block_height, 0);
107 std::vector<u8> swizzled_data(size); 107 std::vector<u8> swizzled_data(size);
108 Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4, 108 Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4,
109 swizzled_data.data(), converted_frame_buffer.get(), 109 frame->width, 4, swizzled_data.data(),
110 false, block_height, 0, 1); 110 converted_frame_buffer.get(), block_height, 0, 0);
111 111
112 gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); 112 gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);
113 gpu.Maxwell3D().OnMemoryWrite(); 113 gpu.Maxwell3D().OnMemoryWrite();
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp
index b06c32c84..acf2668dc 100644
--- a/src/video_core/compatible_formats.cpp
+++ b/src/video_core/compatible_formats.cpp
@@ -3,33 +3,33 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array> 5#include <array>
6#include <bitset>
7#include <cstddef> 6#include <cstddef>
8 7
8#include "common/common_types.h"
9#include "video_core/compatible_formats.h" 9#include "video_core/compatible_formats.h"
10#include "video_core/surface.h" 10#include "video_core/surface.h"
11 11
12namespace VideoCore::Surface { 12namespace VideoCore::Surface {
13
14namespace { 13namespace {
14using Table = std::array<std::array<u64, 2>, MaxPixelFormat>;
15 15
16// Compatibility table taken from Table 3.X.2 in: 16// Compatibility table taken from Table 3.X.2 in:
17// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt 17// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt
18 18
19constexpr std::array VIEW_CLASS_128_BITS = { 19constexpr std::array VIEW_CLASS_128_BITS{
20 PixelFormat::R32G32B32A32_FLOAT, 20 PixelFormat::R32G32B32A32_FLOAT,
21 PixelFormat::R32G32B32A32_UINT, 21 PixelFormat::R32G32B32A32_UINT,
22 PixelFormat::R32G32B32A32_SINT, 22 PixelFormat::R32G32B32A32_SINT,
23}; 23};
24 24
25constexpr std::array VIEW_CLASS_96_BITS = { 25constexpr std::array VIEW_CLASS_96_BITS{
26 PixelFormat::R32G32B32_FLOAT, 26 PixelFormat::R32G32B32_FLOAT,
27}; 27};
28// Missing formats: 28// Missing formats:
29// PixelFormat::RGB32UI, 29// PixelFormat::RGB32UI,
30// PixelFormat::RGB32I, 30// PixelFormat::RGB32I,
31 31
32constexpr std::array VIEW_CLASS_64_BITS = { 32constexpr std::array VIEW_CLASS_64_BITS{
33 PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT, 33 PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT,
34 PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT, 34 PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT,
35 PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, 35 PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
@@ -38,7 +38,7 @@ constexpr std::array VIEW_CLASS_64_BITS = {
38 38
39// TODO: How should we handle 48 bits? 39// TODO: How should we handle 48 bits?
40 40
41constexpr std::array VIEW_CLASS_32_BITS = { 41constexpr std::array VIEW_CLASS_32_BITS{
42 PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT, 42 PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT,
43 PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT, 43 PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
44 PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM, 44 PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
@@ -50,43 +50,105 @@ constexpr std::array VIEW_CLASS_32_BITS = {
50 50
51// TODO: How should we handle 24 bits? 51// TODO: How should we handle 24 bits?
52 52
53constexpr std::array VIEW_CLASS_16_BITS = { 53constexpr std::array VIEW_CLASS_16_BITS{
54 PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT, 54 PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT,
55 PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM, 55 PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM,
56 PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT, 56 PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT,
57}; 57};
58 58
59constexpr std::array VIEW_CLASS_8_BITS = { 59constexpr std::array VIEW_CLASS_8_BITS{
60 PixelFormat::R8_UINT, 60 PixelFormat::R8_UINT,
61 PixelFormat::R8_UNORM, 61 PixelFormat::R8_UNORM,
62 PixelFormat::R8_SINT, 62 PixelFormat::R8_SINT,
63 PixelFormat::R8_SNORM, 63 PixelFormat::R8_SNORM,
64}; 64};
65 65
66constexpr std::array VIEW_CLASS_RGTC1_RED = { 66constexpr std::array VIEW_CLASS_RGTC1_RED{
67 PixelFormat::BC4_UNORM, 67 PixelFormat::BC4_UNORM,
68 PixelFormat::BC4_SNORM, 68 PixelFormat::BC4_SNORM,
69}; 69};
70 70
71constexpr std::array VIEW_CLASS_RGTC2_RG = { 71constexpr std::array VIEW_CLASS_RGTC2_RG{
72 PixelFormat::BC5_UNORM, 72 PixelFormat::BC5_UNORM,
73 PixelFormat::BC5_SNORM, 73 PixelFormat::BC5_SNORM,
74}; 74};
75 75
76constexpr std::array VIEW_CLASS_BPTC_UNORM = { 76constexpr std::array VIEW_CLASS_BPTC_UNORM{
77 PixelFormat::BC7_UNORM, 77 PixelFormat::BC7_UNORM,
78 PixelFormat::BC7_SRGB, 78 PixelFormat::BC7_SRGB,
79}; 79};
80 80
81constexpr std::array VIEW_CLASS_BPTC_FLOAT = { 81constexpr std::array VIEW_CLASS_BPTC_FLOAT{
82 PixelFormat::BC6H_SFLOAT, 82 PixelFormat::BC6H_SFLOAT,
83 PixelFormat::BC6H_UFLOAT, 83 PixelFormat::BC6H_UFLOAT,
84}; 84};
85 85
86constexpr std::array VIEW_CLASS_ASTC_4x4_RGBA{
87 PixelFormat::ASTC_2D_4X4_UNORM,
88 PixelFormat::ASTC_2D_4X4_SRGB,
89};
90
91constexpr std::array VIEW_CLASS_ASTC_5x4_RGBA{
92 PixelFormat::ASTC_2D_5X4_UNORM,
93 PixelFormat::ASTC_2D_5X4_SRGB,
94};
95
96constexpr std::array VIEW_CLASS_ASTC_5x5_RGBA{
97 PixelFormat::ASTC_2D_5X5_UNORM,
98 PixelFormat::ASTC_2D_5X5_SRGB,
99};
100
101constexpr std::array VIEW_CLASS_ASTC_6x5_RGBA{
102 PixelFormat::ASTC_2D_6X5_UNORM,
103 PixelFormat::ASTC_2D_6X5_SRGB,
104};
105
106constexpr std::array VIEW_CLASS_ASTC_6x6_RGBA{
107 PixelFormat::ASTC_2D_6X6_UNORM,
108 PixelFormat::ASTC_2D_6X6_SRGB,
109};
110
111constexpr std::array VIEW_CLASS_ASTC_8x5_RGBA{
112 PixelFormat::ASTC_2D_8X5_UNORM,
113 PixelFormat::ASTC_2D_8X5_SRGB,
114};
115
116constexpr std::array VIEW_CLASS_ASTC_8x8_RGBA{
117 PixelFormat::ASTC_2D_8X8_UNORM,
118 PixelFormat::ASTC_2D_8X8_SRGB,
119};
120
121// Missing formats:
122// PixelFormat::ASTC_2D_10X5_UNORM
123// PixelFormat::ASTC_2D_10X5_SRGB
124
125// Missing formats:
126// PixelFormat::ASTC_2D_10X6_UNORM
127// PixelFormat::ASTC_2D_10X6_SRGB
128
129constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{
130 PixelFormat::ASTC_2D_10X8_UNORM,
131 PixelFormat::ASTC_2D_10X8_SRGB,
132};
133
134constexpr std::array VIEW_CLASS_ASTC_10x10_RGBA{
135 PixelFormat::ASTC_2D_10X10_UNORM,
136 PixelFormat::ASTC_2D_10X10_SRGB,
137};
138
139// Missing formats
140// ASTC_2D_12X10_UNORM,
141// ASTC_2D_12X10_SRGB,
142
143constexpr std::array VIEW_CLASS_ASTC_12x12_RGBA{
144 PixelFormat::ASTC_2D_12X12_UNORM,
145 PixelFormat::ASTC_2D_12X12_SRGB,
146};
147
86// Compatibility table taken from Table 4.X.1 in: 148// Compatibility table taken from Table 4.X.1 in:
87// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt 149// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt
88 150
89constexpr std::array COPY_CLASS_128_BITS = { 151constexpr std::array COPY_CLASS_128_BITS{
90 PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT, 152 PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT,
91 PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM, 153 PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM,
92 PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM, 154 PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM,
@@ -97,7 +159,7 @@ constexpr std::array COPY_CLASS_128_BITS = {
97// PixelFormat::RGBA32I 159// PixelFormat::RGBA32I
98// COMPRESSED_RG_RGTC2 160// COMPRESSED_RG_RGTC2
99 161
100constexpr std::array COPY_CLASS_64_BITS = { 162constexpr std::array COPY_CLASS_64_BITS{
101 PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT, 163 PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT,
102 PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, 164 PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
103 PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT, 165 PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT,
@@ -110,32 +172,36 @@ constexpr std::array COPY_CLASS_64_BITS = {
110// COMPRESSED_RGBA_S3TC_DXT1_EXT 172// COMPRESSED_RGBA_S3TC_DXT1_EXT
111// COMPRESSED_SIGNED_RED_RGTC1 173// COMPRESSED_SIGNED_RED_RGTC1
112 174
113void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) { 175constexpr void Enable(Table& table, size_t format_a, size_t format_b) {
114 compatiblity[format_a][format_b] = true; 176 table[format_a][format_b / 64] |= u64(1) << (format_b % 64);
115 compatiblity[format_b][format_a] = true; 177 table[format_b][format_a / 64] |= u64(1) << (format_a % 64);
116} 178}
117 179
118void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) { 180constexpr void Enable(Table& table, PixelFormat format_a, PixelFormat format_b) {
119 Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b)); 181 Enable(table, static_cast<size_t>(format_a), static_cast<size_t>(format_b));
120} 182}
121 183
122template <typename Range> 184template <typename Range>
123void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) { 185constexpr void EnableRange(Table& table, const Range& range) {
124 for (auto it_a = range.begin(); it_a != range.end(); ++it_a) { 186 for (auto it_a = range.begin(); it_a != range.end(); ++it_a) {
125 for (auto it_b = it_a; it_b != range.end(); ++it_b) { 187 for (auto it_b = it_a; it_b != range.end(); ++it_b) {
126 Enable(compatibility, *it_a, *it_b); 188 Enable(table, *it_a, *it_b);
127 } 189 }
128 } 190 }
129} 191}
130 192
131} // Anonymous namespace 193constexpr bool IsSupported(const Table& table, PixelFormat format_a, PixelFormat format_b) {
194 const size_t a = static_cast<size_t>(format_a);
195 const size_t b = static_cast<size_t>(format_b);
196 return ((table[a][b / 64] >> (b % 64)) & 1) != 0;
197}
132 198
133FormatCompatibility::FormatCompatibility() { 199constexpr Table MakeViewTable() {
200 Table view{};
134 for (size_t i = 0; i < MaxPixelFormat; ++i) { 201 for (size_t i = 0; i < MaxPixelFormat; ++i) {
135 // Identity is allowed 202 // Identity is allowed
136 Enable(view, i, i); 203 Enable(view, i, i);
137 } 204 }
138
139 EnableRange(view, VIEW_CLASS_128_BITS); 205 EnableRange(view, VIEW_CLASS_128_BITS);
140 EnableRange(view, VIEW_CLASS_96_BITS); 206 EnableRange(view, VIEW_CLASS_96_BITS);
141 EnableRange(view, VIEW_CLASS_64_BITS); 207 EnableRange(view, VIEW_CLASS_64_BITS);
@@ -146,10 +212,39 @@ FormatCompatibility::FormatCompatibility() {
146 EnableRange(view, VIEW_CLASS_RGTC2_RG); 212 EnableRange(view, VIEW_CLASS_RGTC2_RG);
147 EnableRange(view, VIEW_CLASS_BPTC_UNORM); 213 EnableRange(view, VIEW_CLASS_BPTC_UNORM);
148 EnableRange(view, VIEW_CLASS_BPTC_FLOAT); 214 EnableRange(view, VIEW_CLASS_BPTC_FLOAT);
215 EnableRange(view, VIEW_CLASS_ASTC_4x4_RGBA);
216 EnableRange(view, VIEW_CLASS_ASTC_5x4_RGBA);
217 EnableRange(view, VIEW_CLASS_ASTC_5x5_RGBA);
218 EnableRange(view, VIEW_CLASS_ASTC_6x5_RGBA);
219 EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA);
220 EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA);
221 EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA);
222 EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA);
223 EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA);
224 EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA);
225 return view;
226}
149 227
150 copy = view; 228constexpr Table MakeCopyTable() {
229 Table copy = MakeViewTable();
151 EnableRange(copy, COPY_CLASS_128_BITS); 230 EnableRange(copy, COPY_CLASS_128_BITS);
152 EnableRange(copy, COPY_CLASS_64_BITS); 231 EnableRange(copy, COPY_CLASS_64_BITS);
232 return copy;
233}
234} // Anonymous namespace
235
236bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views) {
237 if (broken_views) {
238 // If format views are broken, only accept formats that are identical.
239 return format_a == format_b;
240 }
241 static constexpr Table TABLE = MakeViewTable();
242 return IsSupported(TABLE, format_a, format_b);
243}
244
245bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b) {
246 static constexpr Table TABLE = MakeCopyTable();
247 return IsSupported(TABLE, format_a, format_b);
153} 248}
154 249
155} // namespace VideoCore::Surface 250} // namespace VideoCore::Surface
diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h
index 51766349b..9a0522988 100644
--- a/src/video_core/compatible_formats.h
+++ b/src/video_core/compatible_formats.h
@@ -4,31 +4,12 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <bitset>
9#include <cstddef>
10
11#include "video_core/surface.h" 7#include "video_core/surface.h"
12 8
13namespace VideoCore::Surface { 9namespace VideoCore::Surface {
14 10
15class FormatCompatibility { 11bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views);
16public:
17 using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>;
18
19 explicit FormatCompatibility();
20
21 bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept {
22 return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
23 }
24
25 bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept {
26 return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
27 }
28 12
29private: 13bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b);
30 Table view;
31 Table copy;
32};
33 14
34} // namespace VideoCore::Surface 15} // namespace VideoCore::Surface
diff --git a/src/video_core/delayed_destruction_ring.h b/src/video_core/delayed_destruction_ring.h
new file mode 100644
index 000000000..4f1d29c04
--- /dev/null
+++ b/src/video_core/delayed_destruction_ring.h
@@ -0,0 +1,32 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstddef>
9#include <utility>
10#include <vector>
11
12namespace VideoCommon {
13
14/// Container to push objects to be destroyed a few ticks in the future
15template <typename T, size_t TICKS_TO_DESTROY>
16class DelayedDestructionRing {
17public:
18 void Tick() {
19 index = (index + 1) % TICKS_TO_DESTROY;
20 elements[index].clear();
21 }
22
23 void Push(T&& object) {
24 elements[index].push_back(std::move(object));
25 }
26
27private:
28 size_t index = 0;
29 std::array<std::vector<T>, TICKS_TO_DESTROY> elements;
30};
31
32} // namespace VideoCommon
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp
index 2faa6ef0e..b1eaac00c 100644
--- a/src/video_core/dirty_flags.cpp
+++ b/src/video_core/dirty_flags.cpp
@@ -16,6 +16,9 @@ namespace VideoCommon::Dirty {
16using Tegra::Engines::Maxwell3D; 16using Tegra::Engines::Maxwell3D;
17 17
18void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { 18void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) {
19 FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors);
20 FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors);
21
19 static constexpr std::size_t num_per_rt = NUM(rt[0]); 22 static constexpr std::size_t num_per_rt = NUM(rt[0]);
20 static constexpr std::size_t begin = OFF(rt); 23 static constexpr std::size_t begin = OFF(rt);
21 static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; 24 static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets;
@@ -23,6 +26,10 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl
23 FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt); 26 FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt);
24 } 27 }
25 FillBlock(tables[1], begin, num, RenderTargets); 28 FillBlock(tables[1], begin, num, RenderTargets);
29 FillBlock(tables[0], OFF(render_area), NUM(render_area), RenderTargets);
30
31 tables[0][OFF(rt_control)] = RenderTargets;
32 tables[1][OFF(rt_control)] = RenderTargetControl;
26 33
27 static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets}; 34 static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets};
28 for (std::size_t i = 0; i < std::size(zeta_flags); ++i) { 35 for (std::size_t i = 0; i < std::size(zeta_flags); ++i) {
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h
index 3f6c1d83a..875527ddd 100644
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@@ -16,7 +16,10 @@ namespace VideoCommon::Dirty {
16enum : u8 { 16enum : u8 {
17 NullEntry = 0, 17 NullEntry = 0,
18 18
19 Descriptors,
20
19 RenderTargets, 21 RenderTargets,
22 RenderTargetControl,
20 ColorBuffer0, 23 ColorBuffer0,
21 ColorBuffer1, 24 ColorBuffer1,
22 ColorBuffer2, 25 ColorBuffer2,
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 4293d676c..a01d334ad 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -10,7 +10,11 @@
10 10
11namespace Tegra::Engines { 11namespace Tegra::Engines {
12 12
13Fermi2D::Fermi2D() = default; 13Fermi2D::Fermi2D() {
14 // Nvidia's OpenGL driver seems to assume these values
15 regs.src.depth = 1;
16 regs.dst.depth = 1;
17}
14 18
15Fermi2D::~Fermi2D() = default; 19Fermi2D::~Fermi2D() = default;
16 20
@@ -21,78 +25,43 @@ void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
21void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { 25void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
22 ASSERT_MSG(method < Regs::NUM_REGS, 26 ASSERT_MSG(method < Regs::NUM_REGS,
23 "Invalid Fermi2D register, increase the size of the Regs structure"); 27 "Invalid Fermi2D register, increase the size of the Regs structure");
24
25 regs.reg_array[method] = method_argument; 28 regs.reg_array[method] = method_argument;
26 29
27 switch (method) { 30 if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) {
28 // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit, 31 Blit();
29 // so trigger on the second 32-bit write.
30 case FERMI2D_REG_INDEX(blit_src_y) + 1: {
31 HandleSurfaceCopy();
32 break;
33 }
34 } 32 }
35} 33}
36 34
37void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { 35void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
38 for (std::size_t i = 0; i < amount; i++) { 36 for (u32 i = 0; i < amount; ++i) {
39 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); 37 CallMethod(method, base_start[i], methods_pending - i <= 1);
40 } 38 }
41} 39}
42 40
43static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) { 41void Fermi2D::Blit() {
44 const u32 line_a = src_2 - src_1; 42 LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
45 const u32 line_b = dst_2 - dst_1; 43 regs.src.Address(), regs.dst.Address());
46 const u32 excess = std::max<s32>(0, line_a - src_line + src_1);
47 return {line_b - (excess * line_b) / line_a, excess};
48}
49
50void Fermi2D::HandleSurfaceCopy() {
51 LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}", regs.operation);
52 44
53 // TODO(Subv): Only raw copies are implemented. 45 UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy");
54 ASSERT(regs.operation == Operation::SrcCopy); 46 UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero");
47 UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero");
48 UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one");
49 UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
55 50
56 const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)}; 51 const auto& args = regs.pixels_from_memory;
57 const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)}; 52 const Config config{
58 u32 src_blit_x2, src_blit_y2;
59 if (regs.blit_control.origin == Origin::Corner) {
60 src_blit_x2 =
61 static_cast<u32>((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32);
62 src_blit_y2 =
63 static_cast<u32>((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32);
64 } else {
65 src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width);
66 src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height);
67 }
68 u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width;
69 u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height;
70 const auto [new_dst_w, src_excess_x] =
71 DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width);
72 const auto [new_dst_h, src_excess_y] =
73 DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height);
74 dst_blit_x2 = new_dst_w + regs.blit_dst_x;
75 src_blit_x2 = src_blit_x2 - src_excess_x;
76 dst_blit_y2 = new_dst_h + regs.blit_dst_y;
77 src_blit_y2 = src_blit_y2 - src_excess_y;
78 const auto [new_src_w, dst_excess_x] =
79 DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width);
80 const auto [new_src_h, dst_excess_y] =
81 DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height);
82 src_blit_x2 = new_src_w + src_blit_x1;
83 dst_blit_x2 = dst_blit_x2 - dst_excess_x;
84 src_blit_y2 = new_src_h + src_blit_y1;
85 dst_blit_y2 = dst_blit_y2 - dst_excess_y;
86 const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
87 const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2,
88 dst_blit_y2};
89 const Config copy_config{
90 .operation = regs.operation, 53 .operation = regs.operation,
91 .filter = regs.blit_control.filter, 54 .filter = args.sample_mode.filter,
92 .src_rect = src_rect, 55 .dst_x0 = args.dst_x0,
93 .dst_rect = dst_rect, 56 .dst_y0 = args.dst_y0,
57 .dst_x1 = args.dst_x0 + args.dst_width,
58 .dst_y1 = args.dst_y0 + args.dst_height,
59 .src_x0 = static_cast<s32>(args.src_x0 >> 32),
60 .src_y0 = static_cast<s32>(args.src_y0 >> 32),
61 .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
62 .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
94 }; 63 };
95 if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { 64 if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) {
96 UNIMPLEMENTED(); 65 UNIMPLEMENTED();
97 } 66 }
98} 67}
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 0909709ec..81522988e 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -53,8 +53,8 @@ public:
53 }; 53 };
54 54
55 enum class Filter : u32 { 55 enum class Filter : u32 {
56 PointSample = 0, // Nearest 56 Point = 0,
57 Linear = 1, 57 Bilinear = 1,
58 }; 58 };
59 59
60 enum class Operation : u32 { 60 enum class Operation : u32 {
@@ -67,88 +67,235 @@ public:
67 BlendPremult = 6, 67 BlendPremult = 6,
68 }; 68 };
69 69
70 struct Regs { 70 enum class MemoryLayout : u32 {
71 static constexpr std::size_t NUM_REGS = 0x258; 71 BlockLinear = 0,
72 Pitch = 1,
73 };
72 74
73 struct Surface { 75 enum class CpuIndexWrap : u32 {
74 RenderTargetFormat format; 76 Wrap = 0,
75 BitField<0, 1, u32> linear; 77 NoWrap = 1,
76 union { 78 };
77 BitField<0, 4, u32> block_width;
78 BitField<4, 4, u32> block_height;
79 BitField<8, 4, u32> block_depth;
80 };
81 u32 depth;
82 u32 layer;
83 u32 pitch;
84 u32 width;
85 u32 height;
86 u32 address_high;
87 u32 address_low;
88
89 GPUVAddr Address() const {
90 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
91 address_low);
92 }
93
94 u32 BlockWidth() const {
95 return block_width.Value();
96 }
97
98 u32 BlockHeight() const {
99 return block_height.Value();
100 }
101
102 u32 BlockDepth() const {
103 return block_depth.Value();
104 }
105 };
106 static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
107 79
80 struct Surface {
81 RenderTargetFormat format;
82 MemoryLayout linear;
108 union { 83 union {
109 struct { 84 BitField<0, 4, u32> block_width;
110 INSERT_UNION_PADDING_WORDS(0x80); 85 BitField<4, 4, u32> block_height;
86 BitField<8, 4, u32> block_depth;
87 };
88 u32 depth;
89 u32 layer;
90 u32 pitch;
91 u32 width;
92 u32 height;
93 u32 addr_upper;
94 u32 addr_lower;
95
96 [[nodiscard]] constexpr GPUVAddr Address() const noexcept {
97 return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower);
98 }
99 };
100 static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
111 101
112 Surface dst; 102 enum class SectorPromotion : u32 {
103 NoPromotion = 0,
104 PromoteTo2V = 1,
105 PromoteTo2H = 2,
106 PromoteTo4 = 3,
107 };
108
109 enum class NumTpcs : u32 {
110 All = 0,
111 One = 1,
112 };
113 113
114 INSERT_UNION_PADDING_WORDS(2); 114 enum class RenderEnableMode : u32 {
115 False = 0,
116 True = 1,
117 Conditional = 2,
118 RenderIfEqual = 3,
119 RenderIfNotEqual = 4,
120 };
115 121
116 Surface src; 122 enum class ColorKeyFormat : u32 {
123 A16R56G6B5 = 0,
124 A1R5G55B5 = 1,
125 A8R8G8B8 = 2,
126 A2R10G10B10 = 3,
127 Y8 = 4,
128 Y16 = 5,
129 Y32 = 6,
130 };
117 131
118 INSERT_UNION_PADDING_WORDS(0x15); 132 union Beta4 {
133 BitField<0, 8, u32> b;
134 BitField<8, 8, u32> g;
135 BitField<16, 8, u32> r;
136 BitField<24, 8, u32> a;
137 };
119 138
120 Operation operation; 139 struct Point {
140 u32 x;
141 u32 y;
142 };
121 143
122 INSERT_UNION_PADDING_WORDS(0x177); 144 enum class PatternSelect : u32 {
145 MonoChrome8x8 = 0,
146 MonoChrome64x1 = 1,
147 MonoChrome1x64 = 2,
148 Color = 3,
149 };
123 150
151 enum class NotifyType : u32 {
152 WriteOnly = 0,
153 WriteThenAwaken = 1,
154 };
155
156 enum class MonochromePatternColorFormat : u32 {
157 A8X8R8G6B5 = 0,
158 A1R5G5B5 = 1,
159 A8R8G8B8 = 2,
160 A8Y8 = 3,
161 A8X8Y16 = 4,
162 Y32 = 5,
163 };
164
165 enum class MonochromePatternFormat : u32 {
166 CGA6_M1 = 0,
167 LE_M1 = 1,
168 };
169
170 union Regs {
171 static constexpr std::size_t NUM_REGS = 0x258;
172 struct {
173 u32 object;
174 INSERT_UNION_PADDING_WORDS(0x3F);
175 u32 no_operation;
176 NotifyType notify;
177 INSERT_UNION_PADDING_WORDS(0x2);
178 u32 wait_for_idle;
179 INSERT_UNION_PADDING_WORDS(0xB);
180 u32 pm_trigger;
181 INSERT_UNION_PADDING_WORDS(0xF);
182 u32 context_dma_notify;
183 u32 dst_context_dma;
184 u32 src_context_dma;
185 u32 semaphore_context_dma;
186 INSERT_UNION_PADDING_WORDS(0x1C);
187 Surface dst;
188 CpuIndexWrap pixels_from_cpu_index_wrap;
189 u32 kind2d_check_enable;
190 Surface src;
191 SectorPromotion pixels_from_memory_sector_promotion;
192 INSERT_UNION_PADDING_WORDS(0x1);
193 NumTpcs num_tpcs;
194 u32 render_enable_addr_upper;
195 u32 render_enable_addr_lower;
196 RenderEnableMode render_enable_mode;
197 INSERT_UNION_PADDING_WORDS(0x4);
198 u32 clip_x0;
199 u32 clip_y0;
200 u32 clip_width;
201 u32 clip_height;
202 BitField<0, 1, u32> clip_enable;
203 BitField<0, 3, ColorKeyFormat> color_key_format;
204 u32 color_key;
205 BitField<0, 1, u32> color_key_enable;
206 BitField<0, 8, u32> rop;
207 u32 beta1;
208 Beta4 beta4;
209 Operation operation;
210 union {
211 BitField<0, 6, u32> x;
212 BitField<8, 6, u32> y;
213 } pattern_offset;
214 BitField<0, 2, PatternSelect> pattern_select;
215 INSERT_UNION_PADDING_WORDS(0xC);
216 struct {
217 BitField<0, 3, MonochromePatternColorFormat> color_format;
218 BitField<0, 1, MonochromePatternFormat> format;
219 u32 color0;
220 u32 color1;
221 u32 pattern0;
222 u32 pattern1;
223 } monochrome_pattern;
224 struct {
225 std::array<u32, 0x40> X8R8G8B8;
226 std::array<u32, 0x20> R5G6B5;
227 std::array<u32, 0x20> X1R5G5B5;
228 std::array<u32, 0x10> Y8;
229 } color_pattern;
230 INSERT_UNION_PADDING_WORDS(0x10);
231 struct {
232 u32 prim_mode;
233 u32 prim_color_format;
234 u32 prim_color;
235 u32 line_tie_break_bits;
236 INSERT_UNION_PADDING_WORDS(0x14);
237 u32 prim_point_xy;
238 INSERT_UNION_PADDING_WORDS(0x7);
239 std::array<Point, 0x40> prim_point;
240 } render_solid;
241 struct {
242 u32 data_type;
243 u32 color_format;
244 u32 index_format;
245 u32 mono_format;
246 u32 wrap;
247 u32 color0;
248 u32 color1;
249 u32 mono_opacity;
250 INSERT_UNION_PADDING_WORDS(0x6);
251 u32 src_width;
252 u32 src_height;
253 u32 dx_du_frac;
254 u32 dx_du_int;
255 u32 dx_dv_frac;
256 u32 dy_dv_int;
257 u32 dst_x0_frac;
258 u32 dst_x0_int;
259 u32 dst_y0_frac;
260 u32 dst_y0_int;
261 u32 data;
262 } pixels_from_cpu;
263 INSERT_UNION_PADDING_WORDS(0x3);
264 u32 big_endian_control;
265 INSERT_UNION_PADDING_WORDS(0x3);
266 struct {
267 BitField<0, 3, u32> block_shape;
268 BitField<0, 5, u32> corral_size;
269 BitField<0, 1, u32> safe_overlap;
124 union { 270 union {
125 u32 raw;
126 BitField<0, 1, Origin> origin; 271 BitField<0, 1, Origin> origin;
127 BitField<4, 1, Filter> filter; 272 BitField<4, 1, Filter> filter;
128 } blit_control; 273 } sample_mode;
129
130 INSERT_UNION_PADDING_WORDS(0x8); 274 INSERT_UNION_PADDING_WORDS(0x8);
131 275 s32 dst_x0;
132 u32 blit_dst_x; 276 s32 dst_y0;
133 u32 blit_dst_y; 277 s32 dst_width;
134 u32 blit_dst_width; 278 s32 dst_height;
135 u32 blit_dst_height; 279 s64 du_dx;
136 u64 blit_du_dx; 280 s64 dv_dy;
137 u64 blit_dv_dy; 281 s64 src_x0;
138 u64 blit_src_x; 282 s64 src_y0;
139 u64 blit_src_y; 283 } pixels_from_memory;
140
141 INSERT_UNION_PADDING_WORDS(0x21);
142 };
143 std::array<u32, NUM_REGS> reg_array;
144 }; 284 };
285 std::array<u32, NUM_REGS> reg_array;
145 } regs{}; 286 } regs{};
146 287
147 struct Config { 288 struct Config {
148 Operation operation{}; 289 Operation operation;
149 Filter filter{}; 290 Filter filter;
150 Common::Rectangle<u32> src_rect; 291 s32 dst_x0;
151 Common::Rectangle<u32> dst_rect; 292 s32 dst_y0;
293 s32 dst_x1;
294 s32 dst_y1;
295 s32 src_x0;
296 s32 src_y0;
297 s32 src_x1;
298 s32 src_y1;
152 }; 299 };
153 300
154private: 301private:
@@ -156,25 +303,49 @@ private:
156 303
157 /// Performs the copy from the source surface to the destination surface as configured in the 304 /// Performs the copy from the source surface to the destination surface as configured in the
158 /// registers. 305 /// registers.
159 void HandleSurfaceCopy(); 306 void Blit();
160}; 307};
161 308
162#define ASSERT_REG_POSITION(field_name, position) \ 309#define ASSERT_REG_POSITION(field_name, position) \
163 static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \ 310 static_assert(offsetof(Fermi2D::Regs, field_name) == position, \
164 "Field " #field_name " has invalid position") 311 "Field " #field_name " has invalid position")
165 312
166ASSERT_REG_POSITION(dst, 0x80); 313ASSERT_REG_POSITION(object, 0x0);
167ASSERT_REG_POSITION(src, 0x8C); 314ASSERT_REG_POSITION(no_operation, 0x100);
168ASSERT_REG_POSITION(operation, 0xAB); 315ASSERT_REG_POSITION(notify, 0x104);
169ASSERT_REG_POSITION(blit_control, 0x223); 316ASSERT_REG_POSITION(wait_for_idle, 0x110);
170ASSERT_REG_POSITION(blit_dst_x, 0x22c); 317ASSERT_REG_POSITION(pm_trigger, 0x140);
171ASSERT_REG_POSITION(blit_dst_y, 0x22d); 318ASSERT_REG_POSITION(context_dma_notify, 0x180);
172ASSERT_REG_POSITION(blit_dst_width, 0x22e); 319ASSERT_REG_POSITION(dst_context_dma, 0x184);
173ASSERT_REG_POSITION(blit_dst_height, 0x22f); 320ASSERT_REG_POSITION(src_context_dma, 0x188);
174ASSERT_REG_POSITION(blit_du_dx, 0x230); 321ASSERT_REG_POSITION(semaphore_context_dma, 0x18C);
175ASSERT_REG_POSITION(blit_dv_dy, 0x232); 322ASSERT_REG_POSITION(dst, 0x200);
176ASSERT_REG_POSITION(blit_src_x, 0x234); 323ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228);
177ASSERT_REG_POSITION(blit_src_y, 0x236); 324ASSERT_REG_POSITION(kind2d_check_enable, 0x22C);
325ASSERT_REG_POSITION(src, 0x230);
326ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258);
327ASSERT_REG_POSITION(num_tpcs, 0x260);
328ASSERT_REG_POSITION(render_enable_addr_upper, 0x264);
329ASSERT_REG_POSITION(render_enable_addr_lower, 0x268);
330ASSERT_REG_POSITION(clip_x0, 0x280);
331ASSERT_REG_POSITION(clip_y0, 0x284);
332ASSERT_REG_POSITION(clip_width, 0x288);
333ASSERT_REG_POSITION(clip_height, 0x28c);
334ASSERT_REG_POSITION(clip_enable, 0x290);
335ASSERT_REG_POSITION(color_key_format, 0x294);
336ASSERT_REG_POSITION(color_key, 0x298);
337ASSERT_REG_POSITION(rop, 0x2A0);
338ASSERT_REG_POSITION(beta1, 0x2A4);
339ASSERT_REG_POSITION(beta4, 0x2A8);
340ASSERT_REG_POSITION(operation, 0x2AC);
341ASSERT_REG_POSITION(pattern_offset, 0x2B0);
342ASSERT_REG_POSITION(pattern_select, 0x2B4);
343ASSERT_REG_POSITION(monochrome_pattern, 0x2E8);
344ASSERT_REG_POSITION(color_pattern, 0x300);
345ASSERT_REG_POSITION(render_solid, 0x580);
346ASSERT_REG_POSITION(pixels_from_cpu, 0x800);
347ASSERT_REG_POSITION(big_endian_control, 0x870);
348ASSERT_REG_POSITION(pixels_from_memory, 0x880);
178 349
179#undef ASSERT_REG_POSITION 350#undef ASSERT_REG_POSITION
180 351
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 898370739..ba387506e 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -58,24 +58,6 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
58 } 58 }
59} 59}
60 60
61Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
62 const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value();
63 ASSERT(cbuf_mask[regs.tex_cb_index]);
64
65 const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index];
66 ASSERT(texinfo.Address() != 0);
67
68 const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle);
69 ASSERT(address < texinfo.Address() + texinfo.size);
70
71 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)};
72 return GetTextureInfo(tex_handle);
73}
74
75Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const {
76 return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
77}
78
79u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { 61u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
80 ASSERT(stage == ShaderType::Compute); 62 ASSERT(stage == ShaderType::Compute);
81 const auto& buffer = launch_description.const_buffer_config[const_buffer]; 63 const auto& buffer = launch_description.const_buffer_config[const_buffer];
@@ -98,9 +80,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
98 80
99SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { 81SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
100 const Texture::TextureHandle tex_handle{handle}; 82 const Texture::TextureHandle tex_handle{handle};
101 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); 83 const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
102 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); 84 const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
103 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); 85
86 SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
87 result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
104 return result; 88 return result;
105} 89}
106 90
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 7f2500aab..51a041202 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -209,11 +209,6 @@ public:
209 void CallMultiMethod(u32 method, const u32* base_start, u32 amount, 209 void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
210 u32 methods_pending) override; 210 u32 methods_pending) override;
211 211
212 Texture::FullTextureInfo GetTexture(std::size_t offset) const;
213
214 /// Given a texture handle, returns the TSC and TIC entries.
215 Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
216
217 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; 212 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
218 213
219 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; 214 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 761962ed0..9be651e24 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -2,7 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cinttypes>
6#include <cstring> 5#include <cstring>
7#include <optional> 6#include <optional>
8#include "common/assert.h" 7#include "common/assert.h"
@@ -227,6 +226,10 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
227 OnMemoryWrite(); 226 OnMemoryWrite();
228 } 227 }
229 return; 228 return;
229 case MAXWELL3D_REG_INDEX(fragment_barrier):
230 return rasterizer->FragmentBarrier();
231 case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
232 return rasterizer->TiledCacheBarrier();
230 } 233 }
231} 234}
232 235
@@ -639,7 +642,7 @@ void Maxwell3D::FinishCBData() {
639} 642}
640 643
641Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { 644Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
642 const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)}; 645 const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)};
643 646
644 Texture::TICEntry tic_entry; 647 Texture::TICEntry tic_entry;
645 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); 648 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
@@ -648,43 +651,19 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
648} 651}
649 652
650Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { 653Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
651 const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)}; 654 const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)};
652 655
653 Texture::TSCEntry tsc_entry; 656 Texture::TSCEntry tsc_entry;
654 memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); 657 memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
655 return tsc_entry; 658 return tsc_entry;
656} 659}
657 660
658Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const {
659 return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
660}
661
662Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const {
663 const auto stage_index = static_cast<std::size_t>(stage);
664 const auto& shader = state.shader_stages[stage_index];
665 const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
666 ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
667
668 const GPUVAddr tex_info_address =
669 tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
670
671 ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
672
673 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
674
675 return GetTextureInfo(tex_handle);
676}
677
678u32 Maxwell3D::GetRegisterValue(u32 method) const { 661u32 Maxwell3D::GetRegisterValue(u32 method) const {
679 ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); 662 ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
680 return regs.reg_array[method]; 663 return regs.reg_array[method];
681} 664}
682 665
683void Maxwell3D::ProcessClearBuffers() { 666void Maxwell3D::ProcessClearBuffers() {
684 ASSERT(regs.clear_buffers.R == regs.clear_buffers.G &&
685 regs.clear_buffers.R == regs.clear_buffers.B &&
686 regs.clear_buffers.R == regs.clear_buffers.A);
687
688 rasterizer->Clear(); 667 rasterizer->Clear();
689} 668}
690 669
@@ -692,9 +671,7 @@ u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offse
692 ASSERT(stage != ShaderType::Compute); 671 ASSERT(stage != ShaderType::Compute);
693 const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; 672 const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
694 const auto& buffer = shader_stage.const_buffers[const_buffer]; 673 const auto& buffer = shader_stage.const_buffers[const_buffer];
695 u32 result; 674 return memory_manager.Read<u32>(buffer.address + offset);
696 std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32));
697 return result;
698} 675}
699 676
700SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { 677SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
@@ -712,9 +689,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
712 689
713SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { 690SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
714 const Texture::TextureHandle tex_handle{handle}; 691 const Texture::TextureHandle tex_handle{handle};
715 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); 692 const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
716 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); 693 const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
717 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); 694
695 SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
696 result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
718 return result; 697 return result;
719} 698}
720 699
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 564acbc53..bf9e07c9b 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -438,16 +438,6 @@ public:
438 DecrWrapOGL = 0x8508, 438 DecrWrapOGL = 0x8508,
439 }; 439 };
440 440
441 enum class MemoryLayout : u32 {
442 Linear = 0,
443 BlockLinear = 1,
444 };
445
446 enum class InvMemoryLayout : u32 {
447 BlockLinear = 0,
448 Linear = 1,
449 };
450
451 enum class CounterReset : u32 { 441 enum class CounterReset : u32 {
452 SampleCnt = 0x01, 442 SampleCnt = 0x01,
453 Unk02 = 0x02, 443 Unk02 = 0x02,
@@ -589,21 +579,31 @@ public:
589 NegativeW = 7, 579 NegativeW = 7,
590 }; 580 };
591 581
582 enum class SamplerIndex : u32 {
583 Independently = 0,
584 ViaHeaderIndex = 1,
585 };
586
587 struct TileMode {
588 union {
589 BitField<0, 4, u32> block_width;
590 BitField<4, 4, u32> block_height;
591 BitField<8, 4, u32> block_depth;
592 BitField<12, 1, u32> is_pitch_linear;
593 BitField<16, 1, u32> is_3d;
594 };
595 };
596 static_assert(sizeof(TileMode) == 4);
597
592 struct RenderTargetConfig { 598 struct RenderTargetConfig {
593 u32 address_high; 599 u32 address_high;
594 u32 address_low; 600 u32 address_low;
595 u32 width; 601 u32 width;
596 u32 height; 602 u32 height;
597 Tegra::RenderTargetFormat format; 603 Tegra::RenderTargetFormat format;
604 TileMode tile_mode;
598 union { 605 union {
599 BitField<0, 3, u32> block_width; 606 BitField<0, 16, u32> depth;
600 BitField<4, 3, u32> block_height;
601 BitField<8, 3, u32> block_depth;
602 BitField<12, 1, InvMemoryLayout> type;
603 BitField<16, 1, u32> is_3d;
604 } memory_layout;
605 union {
606 BitField<0, 16, u32> layers;
607 BitField<16, 1, u32> volume; 607 BitField<16, 1, u32> volume;
608 }; 608 };
609 u32 layer_stride; 609 u32 layer_stride;
@@ -832,7 +832,11 @@ public:
832 832
833 u32 patch_vertices; 833 u32 patch_vertices;
834 834
835 INSERT_UNION_PADDING_WORDS(0xC); 835 INSERT_UNION_PADDING_WORDS(0x4);
836
837 u32 fragment_barrier;
838
839 INSERT_UNION_PADDING_WORDS(0x7);
836 840
837 std::array<ScissorTest, NumViewports> scissor_test; 841 std::array<ScissorTest, NumViewports> scissor_test;
838 842
@@ -842,7 +846,15 @@ public:
842 u32 stencil_back_mask; 846 u32 stencil_back_mask;
843 u32 stencil_back_func_mask; 847 u32 stencil_back_func_mask;
844 848
845 INSERT_UNION_PADDING_WORDS(0xC); 849 INSERT_UNION_PADDING_WORDS(0x5);
850
851 u32 invalidate_texture_data_cache;
852
853 INSERT_UNION_PADDING_WORDS(0x1);
854
855 u32 tiled_cache_barrier;
856
857 INSERT_UNION_PADDING_WORDS(0x4);
846 858
847 u32 color_mask_common; 859 u32 color_mask_common;
848 860
@@ -866,12 +878,7 @@ public:
866 u32 address_high; 878 u32 address_high;
867 u32 address_low; 879 u32 address_low;
868 Tegra::DepthFormat format; 880 Tegra::DepthFormat format;
869 union { 881 TileMode tile_mode;
870 BitField<0, 4, u32> block_width;
871 BitField<4, 4, u32> block_height;
872 BitField<8, 4, u32> block_depth;
873 BitField<20, 1, InvMemoryLayout> type;
874 } memory_layout;
875 u32 layer_stride; 882 u32 layer_stride;
876 883
877 GPUVAddr Address() const { 884 GPUVAddr Address() const {
@@ -880,7 +887,18 @@ public:
880 } 887 }
881 } zeta; 888 } zeta;
882 889
883 INSERT_UNION_PADDING_WORDS(0x41); 890 struct {
891 union {
892 BitField<0, 16, u32> x;
893 BitField<16, 16, u32> width;
894 };
895 union {
896 BitField<0, 16, u32> y;
897 BitField<16, 16, u32> height;
898 };
899 } render_area;
900
901 INSERT_UNION_PADDING_WORDS(0x3F);
884 902
885 union { 903 union {
886 BitField<0, 4, u32> stencil; 904 BitField<0, 4, u32> stencil;
@@ -921,7 +939,7 @@ public:
921 BitField<25, 3, u32> map_7; 939 BitField<25, 3, u32> map_7;
922 }; 940 };
923 941
924 u32 GetMap(std::size_t index) const { 942 u32 Map(std::size_t index) const {
925 const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3, 943 const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
926 map_4, map_5, map_6, map_7}; 944 map_4, map_5, map_6, map_7};
927 ASSERT(index < maps.size()); 945 ASSERT(index < maps.size());
@@ -934,11 +952,13 @@ public:
934 u32 zeta_width; 952 u32 zeta_width;
935 u32 zeta_height; 953 u32 zeta_height;
936 union { 954 union {
937 BitField<0, 16, u32> zeta_layers; 955 BitField<0, 16, u32> zeta_depth;
938 BitField<16, 1, u32> zeta_volume; 956 BitField<16, 1, u32> zeta_volume;
939 }; 957 };
940 958
941 INSERT_UNION_PADDING_WORDS(0x26); 959 SamplerIndex sampler_index;
960
961 INSERT_UNION_PADDING_WORDS(0x25);
942 962
943 u32 depth_test_enable; 963 u32 depth_test_enable;
944 964
@@ -964,6 +984,7 @@ public:
964 float b; 984 float b;
965 float a; 985 float a;
966 } blend_color; 986 } blend_color;
987
967 INSERT_UNION_PADDING_WORDS(0x4); 988 INSERT_UNION_PADDING_WORDS(0x4);
968 989
969 struct { 990 struct {
@@ -1001,7 +1022,12 @@ public:
1001 float line_width_smooth; 1022 float line_width_smooth;
1002 float line_width_aliased; 1023 float line_width_aliased;
1003 1024
1004 INSERT_UNION_PADDING_WORDS(0x1F); 1025 INSERT_UNION_PADDING_WORDS(0x1B);
1026
1027 u32 invalidate_sampler_cache_no_wfi;
1028 u32 invalidate_texture_header_cache_no_wfi;
1029
1030 INSERT_UNION_PADDING_WORDS(0x2);
1005 1031
1006 u32 vb_element_base; 1032 u32 vb_element_base;
1007 u32 vb_base_instance; 1033 u32 vb_base_instance;
@@ -1045,13 +1071,13 @@ public:
1045 } condition; 1071 } condition;
1046 1072
1047 struct { 1073 struct {
1048 u32 tsc_address_high; 1074 u32 address_high;
1049 u32 tsc_address_low; 1075 u32 address_low;
1050 u32 tsc_limit; 1076 u32 limit;
1051 1077
1052 GPUVAddr TSCAddress() const { 1078 GPUVAddr Address() const {
1053 return static_cast<GPUVAddr>( 1079 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
1054 (static_cast<GPUVAddr>(tsc_address_high) << 32) | tsc_address_low); 1080 address_low);
1055 } 1081 }
1056 } tsc; 1082 } tsc;
1057 1083
@@ -1062,13 +1088,13 @@ public:
1062 u32 line_smooth_enable; 1088 u32 line_smooth_enable;
1063 1089
1064 struct { 1090 struct {
1065 u32 tic_address_high; 1091 u32 address_high;
1066 u32 tic_address_low; 1092 u32 address_low;
1067 u32 tic_limit; 1093 u32 limit;
1068 1094
1069 GPUVAddr TICAddress() const { 1095 GPUVAddr Address() const {
1070 return static_cast<GPUVAddr>( 1096 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
1071 (static_cast<GPUVAddr>(tic_address_high) << 32) | tic_address_low); 1097 address_low);
1072 } 1098 }
1073 } tic; 1099 } tic;
1074 1100
@@ -1397,12 +1423,6 @@ public:
1397 1423
1398 void FlushMMEInlineDraw(); 1424 void FlushMMEInlineDraw();
1399 1425
1400 /// Given a texture handle, returns the TSC and TIC entries.
1401 Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
1402
1403 /// Returns the texture information for a specific texture in a specific shader stage.
1404 Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const;
1405
1406 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; 1426 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
1407 1427
1408 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; 1428 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
@@ -1598,10 +1618,13 @@ ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
1598ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); 1618ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
1599ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); 1619ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
1600ASSERT_REG_POSITION(patch_vertices, 0x373); 1620ASSERT_REG_POSITION(patch_vertices, 0x373);
1621ASSERT_REG_POSITION(fragment_barrier, 0x378);
1601ASSERT_REG_POSITION(scissor_test, 0x380); 1622ASSERT_REG_POSITION(scissor_test, 0x380);
1602ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); 1623ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
1603ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); 1624ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
1604ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); 1625ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
1626ASSERT_REG_POSITION(invalidate_texture_data_cache, 0x3DD);
1627ASSERT_REG_POSITION(tiled_cache_barrier, 0x3DF);
1605ASSERT_REG_POSITION(color_mask_common, 0x3E4); 1628ASSERT_REG_POSITION(color_mask_common, 0x3E4);
1606ASSERT_REG_POSITION(depth_bounds, 0x3E7); 1629ASSERT_REG_POSITION(depth_bounds, 0x3E7);
1607ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); 1630ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
@@ -1609,6 +1632,7 @@ ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED);
1609ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE); 1632ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE);
1610ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF); 1633ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF);
1611ASSERT_REG_POSITION(zeta, 0x3F8); 1634ASSERT_REG_POSITION(zeta, 0x3F8);
1635ASSERT_REG_POSITION(render_area, 0x3FD);
1612ASSERT_REG_POSITION(clear_flags, 0x43E); 1636ASSERT_REG_POSITION(clear_flags, 0x43E);
1613ASSERT_REG_POSITION(fill_rectangle, 0x44F); 1637ASSERT_REG_POSITION(fill_rectangle, 0x44F);
1614ASSERT_REG_POSITION(vertex_attrib_format, 0x458); 1638ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
@@ -1617,7 +1641,8 @@ ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
1617ASSERT_REG_POSITION(rt_control, 0x487); 1641ASSERT_REG_POSITION(rt_control, 0x487);
1618ASSERT_REG_POSITION(zeta_width, 0x48a); 1642ASSERT_REG_POSITION(zeta_width, 0x48a);
1619ASSERT_REG_POSITION(zeta_height, 0x48b); 1643ASSERT_REG_POSITION(zeta_height, 0x48b);
1620ASSERT_REG_POSITION(zeta_layers, 0x48c); 1644ASSERT_REG_POSITION(zeta_depth, 0x48c);
1645ASSERT_REG_POSITION(sampler_index, 0x48D);
1621ASSERT_REG_POSITION(depth_test_enable, 0x4B3); 1646ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
1622ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); 1647ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
1623ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); 1648ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
@@ -1641,6 +1666,8 @@ ASSERT_REG_POSITION(frag_color_clamp, 0x4EA);
1641ASSERT_REG_POSITION(screen_y_control, 0x4EB); 1666ASSERT_REG_POSITION(screen_y_control, 0x4EB);
1642ASSERT_REG_POSITION(line_width_smooth, 0x4EC); 1667ASSERT_REG_POSITION(line_width_smooth, 0x4EC);
1643ASSERT_REG_POSITION(line_width_aliased, 0x4ED); 1668ASSERT_REG_POSITION(line_width_aliased, 0x4ED);
1669ASSERT_REG_POSITION(invalidate_sampler_cache_no_wfi, 0x509);
1670ASSERT_REG_POSITION(invalidate_texture_header_cache_no_wfi, 0x50A);
1644ASSERT_REG_POSITION(vb_element_base, 0x50D); 1671ASSERT_REG_POSITION(vb_element_base, 0x50D);
1645ASSERT_REG_POSITION(vb_base_instance, 0x50E); 1672ASSERT_REG_POSITION(vb_base_instance, 0x50E);
1646ASSERT_REG_POSITION(clip_distance_enabled, 0x544); 1673ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 1c29e895e..ba750748c 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -96,6 +96,7 @@ void MaxwellDMA::CopyPitchToPitch() {
96} 96}
97 97
98void MaxwellDMA::CopyBlockLinearToPitch() { 98void MaxwellDMA::CopyBlockLinearToPitch() {
99 UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
99 UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0); 100 UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
100 UNIMPLEMENTED_IF(regs.src_params.layer != 0); 101 UNIMPLEMENTED_IF(regs.src_params.layer != 0);
101 102
@@ -135,6 +136,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
135} 136}
136 137
137void MaxwellDMA::CopyPitchToBlockLinear() { 138void MaxwellDMA::CopyPitchToBlockLinear() {
139 UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
140
138 const auto& dst_params = regs.dst_params; 141 const auto& dst_params = regs.dst_params;
139 const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; 142 const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in;
140 const u32 width = dst_params.width; 143 const u32 width = dst_params.width;
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index c5f26896e..3512283ff 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -9,6 +9,7 @@
9 9
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "core/core.h" 11#include "core/core.h"
12#include "video_core/delayed_destruction_ring.h"
12#include "video_core/gpu.h" 13#include "video_core/gpu.h"
13#include "video_core/memory_manager.h" 14#include "video_core/memory_manager.h"
14#include "video_core/rasterizer_interface.h" 15#include "video_core/rasterizer_interface.h"
@@ -47,6 +48,11 @@ protected:
47template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache> 48template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache>
48class FenceManager { 49class FenceManager {
49public: 50public:
51 /// Notify the fence manager about a new frame
52 void TickFrame() {
53 delayed_destruction_ring.Tick();
54 }
55
50 void SignalSemaphore(GPUVAddr addr, u32 value) { 56 void SignalSemaphore(GPUVAddr addr, u32 value) {
51 TryReleasePendingFences(); 57 TryReleasePendingFences();
52 const bool should_flush = ShouldFlush(); 58 const bool should_flush = ShouldFlush();
@@ -86,7 +92,7 @@ public:
86 } else { 92 } else {
87 gpu.IncrementSyncPoint(current_fence->GetPayload()); 93 gpu.IncrementSyncPoint(current_fence->GetPayload());
88 } 94 }
89 fences.pop(); 95 PopFence();
90 } 96 }
91 } 97 }
92 98
@@ -132,7 +138,7 @@ private:
132 } else { 138 } else {
133 gpu.IncrementSyncPoint(current_fence->GetPayload()); 139 gpu.IncrementSyncPoint(current_fence->GetPayload());
134 } 140 }
135 fences.pop(); 141 PopFence();
136 } 142 }
137 } 143 }
138 144
@@ -158,7 +164,14 @@ private:
158 query_cache.CommitAsyncFlushes(); 164 query_cache.CommitAsyncFlushes();
159 } 165 }
160 166
167 void PopFence() {
168 delayed_destruction_ring.Push(std::move(fences.front()));
169 fences.pop();
170 }
171
161 std::queue<TFence> fences; 172 std::queue<TFence> fences;
173
174 DelayedDestructionRing<TFence, 6> delayed_destruction_ring;
162}; 175};
163 176
164} // namespace VideoCommon 177} // namespace VideoCommon
diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h
new file mode 100644
index 000000000..b86c3a757
--- /dev/null
+++ b/src/video_core/framebuffer_config.h
@@ -0,0 +1,31 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7namespace Tegra {
8
9/**
10 * Struct describing framebuffer configuration
11 */
12struct FramebufferConfig {
13 enum class PixelFormat : u32 {
14 A8B8G8R8_UNORM = 1,
15 RGB565_UNORM = 4,
16 B8G8R8A8_UNORM = 5,
17 };
18
19 VAddr address{};
20 u32 offset{};
21 u32 width{};
22 u32 height{};
23 u32 stride{};
24 PixelFormat pixel_format{};
25
26 using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
27 TransformFlags transform_flags{};
28 Common::Rectangle<int> crop_rect;
29};
30
31} // namespace Tegra
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index e2512a7f2..6ab06775f 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -10,6 +10,7 @@
10#include "core/core_timing.h" 10#include "core/core_timing.h"
11#include "core/core_timing_util.h" 11#include "core/core_timing_util.h"
12#include "core/frontend/emu_window.h" 12#include "core/frontend/emu_window.h"
13#include "core/hardware_interrupt_manager.h"
13#include "core/memory.h" 14#include "core/memory.h"
14#include "core/settings.h" 15#include "core/settings.h"
15#include "video_core/engines/fermi_2d.h" 16#include "video_core/engines/fermi_2d.h"
@@ -36,7 +37,8 @@ GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_)
36 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, 37 kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
37 maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, 38 maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
38 kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, 39 kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
39 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {} 40 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
41 gpu_thread{system_, is_async_} {}
40 42
41GPU::~GPU() = default; 43GPU::~GPU() = default;
42 44
@@ -198,10 +200,6 @@ void GPU::SyncGuestHost() {
198 renderer->Rasterizer().SyncGuestHost(); 200 renderer->Rasterizer().SyncGuestHost();
199} 201}
200 202
201void GPU::OnCommandListEnd() {
202 renderer->Rasterizer().ReleaseFences();
203}
204
205enum class GpuSemaphoreOperation { 203enum class GpuSemaphoreOperation {
206 AcquireEqual = 0x1, 204 AcquireEqual = 0x1,
207 WriteLong = 0x2, 205 WriteLong = 0x2,
@@ -461,4 +459,75 @@ void GPU::ProcessSemaphoreAcquire() {
461 } 459 }
462} 460}
463 461
462void GPU::Start() {
463 gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher);
464 cpu_context = renderer->GetRenderWindow().CreateSharedContext();
465 cpu_context->MakeCurrent();
466}
467
468void GPU::ObtainContext() {
469 cpu_context->MakeCurrent();
470}
471
472void GPU::ReleaseContext() {
473 cpu_context->DoneCurrent();
474}
475
476void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
477 gpu_thread.SubmitList(std::move(entries));
478}
479
480void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
481 if (!use_nvdec) {
482 return;
483 }
484 // This condition fires when a video stream ends, clear all intermediary data
485 if (entries[0].raw == 0xDEADB33F) {
486 cdma_pusher.reset();
487 return;
488 }
489 if (!cdma_pusher) {
490 cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
491 }
492
493 // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
494 // TODO(ameerj): RE proper async nvdec operation
495 // gpu_thread.SubmitCommandBuffer(std::move(entries));
496
497 cdma_pusher->Push(std::move(entries));
498 cdma_pusher->DispatchCalls();
499}
500
501void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
502 gpu_thread.SwapBuffers(framebuffer);
503}
504
505void GPU::FlushRegion(VAddr addr, u64 size) {
506 gpu_thread.FlushRegion(addr, size);
507}
508
509void GPU::InvalidateRegion(VAddr addr, u64 size) {
510 gpu_thread.InvalidateRegion(addr, size);
511}
512
513void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
514 gpu_thread.FlushAndInvalidateRegion(addr, size);
515}
516
517void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
518 auto& interrupt_manager = system.InterruptManager();
519 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
520}
521
522void GPU::WaitIdle() const {
523 gpu_thread.WaitIdle();
524}
525
526void GPU::OnCommandListEnd() {
527 if (is_async) {
528 // This command only applies to asynchronous GPU mode
529 gpu_thread.OnCommandListEnd();
530 }
531}
532
464} // namespace Tegra 533} // namespace Tegra
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 660641d04..d81e38680 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -15,6 +15,8 @@
15#include "core/hle/service/nvflinger/buffer_queue.h" 15#include "core/hle/service/nvflinger/buffer_queue.h"
16#include "video_core/cdma_pusher.h" 16#include "video_core/cdma_pusher.h"
17#include "video_core/dma_pusher.h" 17#include "video_core/dma_pusher.h"
18#include "video_core/framebuffer_config.h"
19#include "video_core/gpu_thread.h"
18 20
19using CacheAddr = std::uintptr_t; 21using CacheAddr = std::uintptr_t;
20[[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) { 22[[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) {
@@ -101,28 +103,6 @@ enum class DepthFormat : u32 {
101struct CommandListHeader; 103struct CommandListHeader;
102class DebugContext; 104class DebugContext;
103 105
104/**
105 * Struct describing framebuffer configuration
106 */
107struct FramebufferConfig {
108 enum class PixelFormat : u32 {
109 A8B8G8R8_UNORM = 1,
110 RGB565_UNORM = 4,
111 B8G8R8A8_UNORM = 5,
112 };
113
114 VAddr address;
115 u32 offset;
116 u32 width;
117 u32 height;
118 u32 stride;
119 PixelFormat pixel_format;
120
121 using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
122 TransformFlags transform_flags;
123 Common::Rectangle<int> crop_rect;
124};
125
126namespace Engines { 106namespace Engines {
127class Fermi2D; 107class Fermi2D;
128class Maxwell3D; 108class Maxwell3D;
@@ -141,7 +121,7 @@ enum class EngineID {
141 121
142class MemoryManager; 122class MemoryManager;
143 123
144class GPU { 124class GPU final {
145public: 125public:
146 struct MethodCall { 126 struct MethodCall {
147 u32 method{}; 127 u32 method{};
@@ -159,7 +139,7 @@ public:
159 }; 139 };
160 140
161 explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_); 141 explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_);
162 virtual ~GPU(); 142 ~GPU();
163 143
164 /// Binds a renderer to the GPU. 144 /// Binds a renderer to the GPU.
165 void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer); 145 void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
@@ -176,7 +156,7 @@ public:
176 /// Synchronizes CPU writes with Host GPU memory. 156 /// Synchronizes CPU writes with Host GPU memory.
177 void SyncGuestHost(); 157 void SyncGuestHost();
178 /// Signal the ending of command list. 158 /// Signal the ending of command list.
179 virtual void OnCommandListEnd(); 159 void OnCommandListEnd();
180 160
181 /// Request a host GPU memory flush from the CPU. 161 /// Request a host GPU memory flush from the CPU.
182 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); 162 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
@@ -240,7 +220,7 @@ public:
240 } 220 }
241 221
242 // Waits for the GPU to finish working 222 // Waits for the GPU to finish working
243 virtual void WaitIdle() const = 0; 223 void WaitIdle() const;
244 224
245 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. 225 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
246 void WaitFence(u32 syncpoint_id, u32 value); 226 void WaitFence(u32 syncpoint_id, u32 value);
@@ -330,34 +310,34 @@ public:
330 /// Performs any additional setup necessary in order to begin GPU emulation. 310 /// Performs any additional setup necessary in order to begin GPU emulation.
331 /// This can be used to launch any necessary threads and register any necessary 311 /// This can be used to launch any necessary threads and register any necessary
332 /// core timing events. 312 /// core timing events.
333 virtual void Start() = 0; 313 void Start();
334 314
335 /// Obtain the CPU Context 315 /// Obtain the CPU Context
336 virtual void ObtainContext() = 0; 316 void ObtainContext();
337 317
338 /// Release the CPU Context 318 /// Release the CPU Context
339 virtual void ReleaseContext() = 0; 319 void ReleaseContext();
340 320
341 /// Push GPU command entries to be processed 321 /// Push GPU command entries to be processed
342 virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; 322 void PushGPUEntries(Tegra::CommandList&& entries);
343 323
344 /// Push GPU command buffer entries to be processed 324 /// Push GPU command buffer entries to be processed
345 virtual void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) = 0; 325 void PushCommandBuffer(Tegra::ChCommandHeaderList& entries);
346 326
347 /// Swap buffers (render frame) 327 /// Swap buffers (render frame)
348 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; 328 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
349 329
350 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 330 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
351 virtual void FlushRegion(VAddr addr, u64 size) = 0; 331 void FlushRegion(VAddr addr, u64 size);
352 332
353 /// Notify rasterizer that any caches of the specified region should be invalidated 333 /// Notify rasterizer that any caches of the specified region should be invalidated
354 virtual void InvalidateRegion(VAddr addr, u64 size) = 0; 334 void InvalidateRegion(VAddr addr, u64 size);
355 335
356 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 336 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
357 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; 337 void FlushAndInvalidateRegion(VAddr addr, u64 size);
358 338
359protected: 339protected:
360 virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; 340 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const;
361 341
362private: 342private:
363 void ProcessBindMethod(const MethodCall& method_call); 343 void ProcessBindMethod(const MethodCall& method_call);
@@ -427,6 +407,9 @@ private:
427 std::mutex flush_request_mutex; 407 std::mutex flush_request_mutex;
428 408
429 const bool is_async; 409 const bool is_async;
410
411 VideoCommon::GPUThread::ThreadManager gpu_thread;
412 std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
430}; 413};
431 414
432#define ASSERT_REG_POSITION(field_name, position) \ 415#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
deleted file mode 100644
index 6cc091ecd..000000000
--- a/src/video_core/gpu_asynch.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/core.h"
6#include "core/hardware_interrupt_manager.h"
7#include "video_core/gpu_asynch.h"
8#include "video_core/gpu_thread.h"
9#include "video_core/renderer_base.h"
10
11namespace VideoCommon {
12
13GPUAsynch::GPUAsynch(Core::System& system_, bool use_nvdec_)
14 : GPU{system_, true, use_nvdec_}, gpu_thread{system_} {}
15
16GPUAsynch::~GPUAsynch() = default;
17
18void GPUAsynch::Start() {
19 gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher);
20 cpu_context = renderer->GetRenderWindow().CreateSharedContext();
21 cpu_context->MakeCurrent();
22}
23
24void GPUAsynch::ObtainContext() {
25 cpu_context->MakeCurrent();
26}
27
28void GPUAsynch::ReleaseContext() {
29 cpu_context->DoneCurrent();
30}
31
32void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
33 gpu_thread.SubmitList(std::move(entries));
34}
35
36void GPUAsynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
37 if (!use_nvdec) {
38 return;
39 }
40 // This condition fires when a video stream ends, clear all intermediary data
41 if (entries[0].raw == 0xDEADB33F) {
42 cdma_pusher.reset();
43 return;
44 }
45 if (!cdma_pusher) {
46 cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
47 }
48
49 // SubmitCommandBuffer would make the nvdec operations async, this is not currently working
50 // TODO(ameerj): RE proper async nvdec operation
51 // gpu_thread.SubmitCommandBuffer(std::move(entries));
52
53 cdma_pusher->Push(std::move(entries));
54 cdma_pusher->DispatchCalls();
55}
56
57void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
58 gpu_thread.SwapBuffers(framebuffer);
59}
60
61void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
62 gpu_thread.FlushRegion(addr, size);
63}
64
65void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
66 gpu_thread.InvalidateRegion(addr, size);
67}
68
69void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
70 gpu_thread.FlushAndInvalidateRegion(addr, size);
71}
72
73void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
74 auto& interrupt_manager = system.InterruptManager();
75 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
76}
77
78void GPUAsynch::WaitIdle() const {
79 gpu_thread.WaitIdle();
80}
81
82void GPUAsynch::OnCommandListEnd() {
83 gpu_thread.OnCommandListEnd();
84}
85
86} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
deleted file mode 100644
index a384113f4..000000000
--- a/src/video_core/gpu_asynch.h
+++ /dev/null
@@ -1,47 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/gpu.h"
8#include "video_core/gpu_thread.h"
9
10namespace Core::Frontend {
11class GraphicsContext;
12}
13
14namespace VideoCore {
15class RendererBase;
16} // namespace VideoCore
17
18namespace VideoCommon {
19
20/// Implementation of GPU interface that runs the GPU asynchronously
21class GPUAsynch final : public Tegra::GPU {
22public:
23 explicit GPUAsynch(Core::System& system_, bool use_nvdec_);
24 ~GPUAsynch() override;
25
26 void Start() override;
27 void ObtainContext() override;
28 void ReleaseContext() override;
29 void PushGPUEntries(Tegra::CommandList&& entries) override;
30 void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override;
31 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
32 void FlushRegion(VAddr addr, u64 size) override;
33 void InvalidateRegion(VAddr addr, u64 size) override;
34 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
35 void WaitIdle() const override;
36
37 void OnCommandListEnd() override;
38
39protected:
40 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
41
42private:
43 GPUThread::ThreadManager gpu_thread;
44 std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
45};
46
47} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
deleted file mode 100644
index 1e9d4b9b2..000000000
--- a/src/video_core/gpu_synch.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/gpu_synch.h"
6#include "video_core/renderer_base.h"
7
8namespace VideoCommon {
9
10GPUSynch::GPUSynch(Core::System& system_, bool use_nvdec_) : GPU{system_, false, use_nvdec_} {}
11
12GPUSynch::~GPUSynch() = default;
13
14void GPUSynch::Start() {}
15
16void GPUSynch::ObtainContext() {
17 renderer->Context().MakeCurrent();
18}
19
20void GPUSynch::ReleaseContext() {
21 renderer->Context().DoneCurrent();
22}
23
24void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
25 dma_pusher->Push(std::move(entries));
26 dma_pusher->DispatchCalls();
27}
28
29void GPUSynch::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) {
30 if (!use_nvdec) {
31 return;
32 }
33 // This condition fires when a video stream ends, clears all intermediary data
34 if (entries[0].raw == 0xDEADB33F) {
35 cdma_pusher.reset();
36 return;
37 }
38 if (!cdma_pusher) {
39 cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this);
40 }
41 cdma_pusher->Push(std::move(entries));
42 cdma_pusher->DispatchCalls();
43}
44
45void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
46 renderer->SwapBuffers(framebuffer);
47}
48
49void GPUSynch::FlushRegion(VAddr addr, u64 size) {
50 renderer->Rasterizer().FlushRegion(addr, size);
51}
52
53void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
54 renderer->Rasterizer().InvalidateRegion(addr, size);
55}
56
57void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
58 renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
59}
60
61} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
deleted file mode 100644
index c5904b8db..000000000
--- a/src/video_core/gpu_synch.h
+++ /dev/null
@@ -1,41 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/gpu.h"
8
9namespace Core::Frontend {
10class GraphicsContext;
11}
12
13namespace VideoCore {
14class RendererBase;
15} // namespace VideoCore
16
17namespace VideoCommon {
18
19/// Implementation of GPU interface that runs the GPU synchronously
20class GPUSynch final : public Tegra::GPU {
21public:
22 explicit GPUSynch(Core::System& system_, bool use_nvdec_);
23 ~GPUSynch() override;
24
25 void Start() override;
26 void ObtainContext() override;
27 void ReleaseContext() override;
28 void PushGPUEntries(Tegra::CommandList&& entries) override;
29 void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) override;
30 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
31 void FlushRegion(VAddr addr, u64 size) override;
32 void InvalidateRegion(VAddr addr, u64 size) override;
33 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
34 void WaitIdle() const override {}
35
36protected:
37 void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
38 [[maybe_unused]] u32 value) const override {}
39};
40
41} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index e27218b96..7e490bcc3 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -4,6 +4,7 @@
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "common/scope_exit.h"
7#include "common/thread.h" 8#include "common/thread.h"
8#include "core/core.h" 9#include "core/core.h"
9#include "core/frontend/emu_window.h" 10#include "core/frontend/emu_window.h"
@@ -21,6 +22,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
21 SynchState& state, Tegra::CDmaPusher& cdma_pusher) { 22 SynchState& state, Tegra::CDmaPusher& cdma_pusher) {
22 std::string name = "yuzu:GPU"; 23 std::string name = "yuzu:GPU";
23 MicroProfileOnThreadCreate(name.c_str()); 24 MicroProfileOnThreadCreate(name.c_str());
25 SCOPE_EXIT({ MicroProfileOnThreadExit(); });
26
24 Common::SetCurrentThreadName(name.c_str()); 27 Common::SetCurrentThreadName(name.c_str());
25 Common::SetCurrentThreadPriority(Common::ThreadPriority::High); 28 Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
26 system.RegisterHostThread(); 29 system.RegisterHostThread();
@@ -65,7 +68,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
65 } 68 }
66} 69}
67 70
68ThreadManager::ThreadManager(Core::System& system_) : system{system_} {} 71ThreadManager::ThreadManager(Core::System& system_, bool is_async_)
72 : system{system_}, is_async{is_async_} {}
69 73
70ThreadManager::~ThreadManager() { 74ThreadManager::~ThreadManager() {
71 if (!thread.joinable()) { 75 if (!thread.joinable()) {
@@ -97,19 +101,30 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
97} 101}
98 102
99void ThreadManager::FlushRegion(VAddr addr, u64 size) { 103void ThreadManager::FlushRegion(VAddr addr, u64 size) {
100 if (!Settings::IsGPULevelHigh()) { 104 if (!is_async) {
105 // Always flush with synchronous GPU mode
101 PushCommand(FlushRegionCommand(addr, size)); 106 PushCommand(FlushRegionCommand(addr, size));
102 return; 107 return;
103 } 108 }
104 if (!Settings::IsGPULevelExtreme()) { 109
105 return; 110 // Asynchronous GPU mode
106 } 111 switch (Settings::values.gpu_accuracy.GetValue()) {
107 if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) { 112 case Settings::GPUAccuracy::Normal:
113 PushCommand(FlushRegionCommand(addr, size));
114 break;
115 case Settings::GPUAccuracy::High:
116 // TODO(bunnei): Is this right? Preserving existing behavior for now
117 break;
118 case Settings::GPUAccuracy::Extreme: {
108 auto& gpu = system.GPU(); 119 auto& gpu = system.GPU();
109 u64 fence = gpu.RequestFlush(addr, size); 120 u64 fence = gpu.RequestFlush(addr, size);
110 PushCommand(GPUTickCommand()); 121 PushCommand(GPUTickCommand());
111 while (fence > gpu.CurrentFlushRequestFence()) { 122 while (fence > gpu.CurrentFlushRequestFence()) {
112 } 123 }
124 break;
125 }
126 default:
127 UNIMPLEMENTED_MSG("Unsupported gpu_accuracy {}", Settings::values.gpu_accuracy.GetValue());
113 } 128 }
114} 129}
115 130
@@ -123,7 +138,8 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
123} 138}
124 139
125void ThreadManager::WaitIdle() const { 140void ThreadManager::WaitIdle() const {
126 while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { 141 while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) &&
142 system.IsPoweredOn()) {
127 } 143 }
128} 144}
129 145
@@ -134,6 +150,12 @@ void ThreadManager::OnCommandListEnd() {
134u64 ThreadManager::PushCommand(CommandData&& command_data) { 150u64 ThreadManager::PushCommand(CommandData&& command_data) {
135 const u64 fence{++state.last_fence}; 151 const u64 fence{++state.last_fence};
136 state.queue.Push(CommandDataContainer(std::move(command_data), fence)); 152 state.queue.Push(CommandDataContainer(std::move(command_data), fence));
153
154 if (!is_async) {
155 // In synchronous GPU mode, block the caller until the command has executed
156 WaitIdle();
157 }
158
137 return fence; 159 return fence;
138} 160}
139 161
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index f1c52cd9e..2775629e7 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -10,8 +10,9 @@
10#include <optional> 10#include <optional>
11#include <thread> 11#include <thread>
12#include <variant> 12#include <variant>
13
13#include "common/threadsafe_queue.h" 14#include "common/threadsafe_queue.h"
14#include "video_core/gpu.h" 15#include "video_core/framebuffer_config.h"
15 16
16namespace Tegra { 17namespace Tegra {
17struct FramebufferConfig; 18struct FramebufferConfig;
@@ -25,6 +26,10 @@ class GraphicsContext;
25class System; 26class System;
26} // namespace Core 27} // namespace Core
27 28
29namespace VideoCore {
30class RendererBase;
31} // namespace VideoCore
32
28namespace VideoCommon::GPUThread { 33namespace VideoCommon::GPUThread {
29 34
30/// Command to signal to the GPU thread that processing has ended 35/// Command to signal to the GPU thread that processing has ended
@@ -112,7 +117,7 @@ struct SynchState final {
112/// Class used to manage the GPU thread 117/// Class used to manage the GPU thread
113class ThreadManager final { 118class ThreadManager final {
114public: 119public:
115 explicit ThreadManager(Core::System& system_); 120 explicit ThreadManager(Core::System& system_, bool is_async_);
116 ~ThreadManager(); 121 ~ThreadManager();
117 122
118 /// Creates and starts the GPU thread. 123 /// Creates and starts the GPU thread.
@@ -150,6 +155,7 @@ private:
150 Core::System& system; 155 Core::System& system;
151 std::thread thread; 156 std::thread thread;
152 std::thread::id thread_id; 157 std::thread::id thread_id;
158 const bool is_async;
153}; 159};
154 160
155} // namespace VideoCommon::GPUThread 161} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index c157724a9..4c7399d5a 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -1,8 +1,26 @@
1set(SHADER_SOURCES 1set(SHADER_FILES
2 block_linear_unswizzle_2d.comp
3 block_linear_unswizzle_3d.comp
4 convert_depth_to_float.frag
5 convert_float_to_depth.frag
6 full_screen_triangle.vert
7 opengl_copy_bc4.comp
2 opengl_present.frag 8 opengl_present.frag
3 opengl_present.vert 9 opengl_present.vert
10 pitch_unswizzle.comp
11 vulkan_blit_color_float.frag
12 vulkan_blit_depth_stencil.frag
13 vulkan_present.frag
14 vulkan_present.vert
15 vulkan_quad_array.comp
16 vulkan_quad_indexed.comp
17 vulkan_uint8.comp
4) 18)
5 19
20find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED)
21
22set(GLSL_FLAGS "")
23
6set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include) 24set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include)
7set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders) 25set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders)
8set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) 26set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE)
@@ -10,27 +28,44 @@ set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE)
10set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in) 28set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in)
11set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake) 29set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake)
12 30
13foreach(FILENAME IN ITEMS ${SHADER_SOURCES}) 31foreach(FILENAME IN ITEMS ${SHADER_FILES})
14 string(REPLACE "." "_" SHADER_NAME ${FILENAME}) 32 string(REPLACE "." "_" SHADER_NAME ${FILENAME})
15 set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) 33 set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME})
16 set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) 34 # Skip generating source headers on Vulkan exclusive files
17 add_custom_command( 35 if (NOT ${FILENAME} MATCHES "vulkan.*")
18 OUTPUT 36 set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h)
19 ${HEADER_FILE} 37 add_custom_command(
20 COMMAND 38 OUTPUT
21 ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${HEADER_FILE} ${INPUT_FILE} 39 ${SOURCE_HEADER_FILE}
22 MAIN_DEPENDENCY 40 COMMAND
23 ${SOURCE_FILE} 41 ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE}
24 DEPENDS 42 MAIN_DEPENDENCY
25 ${INPUT_FILE} 43 ${SOURCE_FILE}
26 # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified 44 DEPENDS
27 ) 45 ${INPUT_FILE}
28 set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE}) 46 # HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified
47 )
48 set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE})
49 endif()
50 # Skip compiling to SPIR-V OpenGL exclusive files
51 if (NOT ${FILENAME} MATCHES "opengl.*")
52 string(TOUPPER ${SHADER_NAME}_SPV SPIRV_VARIABLE_NAME)
53 set(SPIRV_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_spv.h)
54 add_custom_command(
55 OUTPUT
56 ${SPIRV_HEADER_FILE}
57 COMMAND
58 ${GLSLANGVALIDATOR} -V ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE}
59 MAIN_DEPENDENCY
60 ${SOURCE_FILE}
61 )
62 set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE})
63 endif()
29endforeach() 64endforeach()
30 65
31add_custom_target(host_shaders 66add_custom_target(host_shaders
32 DEPENDS 67 DEPENDS
33 ${SHADER_HEADERS} 68 ${SHADER_HEADERS}
34 SOURCES 69 SOURCES
35 ${SHADER_SOURCES} 70 ${SHADER_FILES}
36) 71)
diff --git a/src/video_core/host_shaders/block_linear_unswizzle_2d.comp b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp
new file mode 100644
index 000000000..a131be79e
--- /dev/null
+++ b/src/video_core/host_shaders/block_linear_unswizzle_2d.comp
@@ -0,0 +1,122 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 430
6
7#ifdef VULKAN
8
9#extension GL_EXT_shader_16bit_storage : require
10#extension GL_EXT_shader_8bit_storage : require
11#define HAS_EXTENDED_TYPES 1
12#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
13#define END_PUSH_CONSTANTS };
14#define UNIFORM(n)
15#define BINDING_SWIZZLE_BUFFER 0
16#define BINDING_INPUT_BUFFER 1
17#define BINDING_OUTPUT_IMAGE 2
18
19#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
20
21#extension GL_NV_gpu_shader5 : enable
22#ifdef GL_NV_gpu_shader5
23#define HAS_EXTENDED_TYPES 1
24#else
25#define HAS_EXTENDED_TYPES 0
26#endif
27#define BEGIN_PUSH_CONSTANTS
28#define END_PUSH_CONSTANTS
29#define UNIFORM(n) layout (location = n) uniform
30#define BINDING_SWIZZLE_BUFFER 0
31#define BINDING_INPUT_BUFFER 1
32#define BINDING_OUTPUT_IMAGE 0
33
34#endif
35
36BEGIN_PUSH_CONSTANTS
37UNIFORM(0) uvec3 origin;
38UNIFORM(1) ivec3 destination;
39UNIFORM(2) uint bytes_per_block_log2;
40UNIFORM(3) uint layer_stride;
41UNIFORM(4) uint block_size;
42UNIFORM(5) uint x_shift;
43UNIFORM(6) uint block_height;
44UNIFORM(7) uint block_height_mask;
45END_PUSH_CONSTANTS
46
47layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable {
48 uint swizzle_table[];
49};
50
51#if HAS_EXTENDED_TYPES
52layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; };
53layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; };
54#endif
55layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; };
56layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; };
57layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; };
58
59layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage2DArray output_image;
60
61layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
62
63const uint GOB_SIZE_X = 64;
64const uint GOB_SIZE_Y = 8;
65const uint GOB_SIZE_Z = 1;
66const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
67
68const uint GOB_SIZE_X_SHIFT = 6;
69const uint GOB_SIZE_Y_SHIFT = 3;
70const uint GOB_SIZE_Z_SHIFT = 0;
71const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
72
73const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1);
74
75uint SwizzleOffset(uvec2 pos) {
76 pos = pos & SWIZZLE_MASK;
77 return swizzle_table[pos.y * 64 + pos.x];
78}
79
80uvec4 ReadTexel(uint offset) {
81 switch (bytes_per_block_log2) {
82#if HAS_EXTENDED_TYPES
83 case 0:
84 return uvec4(u8data[offset], 0, 0, 0);
85 case 1:
86 return uvec4(u16data[offset / 2], 0, 0, 0);
87#else
88 case 0:
89 return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0);
90 case 1:
91 return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0);
92#endif
93 case 2:
94 return uvec4(u32data[offset / 4], 0, 0, 0);
95 case 3:
96 return uvec4(u64data[offset / 8], 0, 0);
97 case 4:
98 return u128data[offset / 16];
99 }
100 return uvec4(0);
101}
102
103void main() {
104 uvec3 pos = gl_GlobalInvocationID + origin;
105 pos.x <<= bytes_per_block_log2;
106
107 // Read as soon as possible due to its latency
108 const uint swizzle = SwizzleOffset(pos.xy);
109
110 const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT;
111
112 uint offset = 0;
113 offset += pos.z * layer_stride;
114 offset += (block_y >> block_height) * block_size;
115 offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT;
116 offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift;
117 offset += swizzle;
118
119 const uvec4 texel = ReadTexel(offset);
120 const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination;
121 imageStore(output_image, coord, texel);
122}
diff --git a/src/video_core/host_shaders/block_linear_unswizzle_3d.comp b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp
new file mode 100644
index 000000000..bb6872e6b
--- /dev/null
+++ b/src/video_core/host_shaders/block_linear_unswizzle_3d.comp
@@ -0,0 +1,125 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 430
6
7#ifdef VULKAN
8
9#extension GL_EXT_shader_16bit_storage : require
10#extension GL_EXT_shader_8bit_storage : require
11#define HAS_EXTENDED_TYPES 1
12#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
13#define END_PUSH_CONSTANTS };
14#define UNIFORM(n)
15#define BINDING_SWIZZLE_BUFFER 0
16#define BINDING_INPUT_BUFFER 1
17#define BINDING_OUTPUT_IMAGE 2
18
19#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
20
21#extension GL_NV_gpu_shader5 : enable
22#ifdef GL_NV_gpu_shader5
23#define HAS_EXTENDED_TYPES 1
24#else
25#define HAS_EXTENDED_TYPES 0
26#endif
27#define BEGIN_PUSH_CONSTANTS
28#define END_PUSH_CONSTANTS
29#define UNIFORM(n) layout (location = n) uniform
30#define BINDING_SWIZZLE_BUFFER 0
31#define BINDING_INPUT_BUFFER 1
32#define BINDING_OUTPUT_IMAGE 0
33
34#endif
35
36BEGIN_PUSH_CONSTANTS
37UNIFORM(0) uvec3 origin;
38UNIFORM(1) ivec3 destination;
39UNIFORM(2) uint bytes_per_block_log2;
40UNIFORM(3) uint slice_size;
41UNIFORM(4) uint block_size;
42UNIFORM(5) uint x_shift;
43UNIFORM(6) uint block_height;
44UNIFORM(7) uint block_height_mask;
45UNIFORM(8) uint block_depth;
46UNIFORM(9) uint block_depth_mask;
47END_PUSH_CONSTANTS
48
49layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable {
50 uint swizzle_table[];
51};
52
53#if HAS_EXTENDED_TYPES
54layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; };
55layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; };
56#endif
57layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; };
58layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; };
59layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; };
60
61layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage3D output_image;
62
63layout(local_size_x = 16, local_size_y = 8, local_size_z = 8) in;
64
65const uint GOB_SIZE_X = 64;
66const uint GOB_SIZE_Y = 8;
67const uint GOB_SIZE_Z = 1;
68const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
69
70const uint GOB_SIZE_X_SHIFT = 6;
71const uint GOB_SIZE_Y_SHIFT = 3;
72const uint GOB_SIZE_Z_SHIFT = 0;
73const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
74
75const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1);
76
77uint SwizzleOffset(uvec2 pos) {
78 pos = pos & SWIZZLE_MASK;
79 return swizzle_table[pos.y * 64 + pos.x];
80}
81
82uvec4 ReadTexel(uint offset) {
83 switch (bytes_per_block_log2) {
84#if HAS_EXTENDED_TYPES
85 case 0:
86 return uvec4(u8data[offset], 0, 0, 0);
87 case 1:
88 return uvec4(u16data[offset / 2], 0, 0, 0);
89#else
90 case 0:
91 return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0);
92 case 1:
93 return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0);
94#endif
95 case 2:
96 return uvec4(u32data[offset / 4], 0, 0, 0);
97 case 3:
98 return uvec4(u64data[offset / 8], 0, 0);
99 case 4:
100 return u128data[offset / 16];
101 }
102 return uvec4(0);
103}
104
105void main() {
106 uvec3 pos = gl_GlobalInvocationID + origin;
107 pos.x <<= bytes_per_block_log2;
108
109 // Read as soon as possible due to its latency
110 const uint swizzle = SwizzleOffset(pos.xy);
111
112 const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT;
113
114 uint offset = 0;
115 offset += (pos.z >> block_depth) * slice_size;
116 offset += (pos.z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height);
117 offset += (block_y >> block_height) * block_size;
118 offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT;
119 offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift;
120 offset += swizzle;
121
122 const uvec4 texel = ReadTexel(offset);
123 const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination;
124 imageStore(output_image, coord, texel);
125}
diff --git a/src/video_core/host_shaders/convert_depth_to_float.frag b/src/video_core/host_shaders/convert_depth_to_float.frag
new file mode 100644
index 000000000..624c58509
--- /dev/null
+++ b/src/video_core/host_shaders/convert_depth_to_float.frag
@@ -0,0 +1,13 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 450
6
7layout(binding = 0) uniform sampler2D depth_texture;
8layout(location = 0) out float output_color;
9
10void main() {
11 ivec2 coord = ivec2(gl_FragCoord.xy);
12 output_color = texelFetch(depth_texture, coord, 0).r;
13}
diff --git a/src/video_core/host_shaders/convert_float_to_depth.frag b/src/video_core/host_shaders/convert_float_to_depth.frag
new file mode 100644
index 000000000..d86c795f4
--- /dev/null
+++ b/src/video_core/host_shaders/convert_float_to_depth.frag
@@ -0,0 +1,13 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 450
6
7layout(binding = 0) uniform sampler2D color_texture;
8
9void main() {
10 ivec2 coord = ivec2(gl_FragCoord.xy);
11 float color = texelFetch(color_texture, coord, 0).r;
12 gl_FragDepth = color;
13}
diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert
new file mode 100644
index 000000000..452ad6502
--- /dev/null
+++ b/src/video_core/host_shaders/full_screen_triangle.vert
@@ -0,0 +1,29 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 450
6
7#ifdef VULKAN
8#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
9#define END_PUSH_CONSTANTS };
10#define UNIFORM(n)
11#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
12#define BEGIN_PUSH_CONSTANTS
13#define END_PUSH_CONSTANTS
14#define UNIFORM(n) layout (location = n) uniform
15#endif
16
17BEGIN_PUSH_CONSTANTS
18UNIFORM(0) vec2 tex_scale;
19UNIFORM(1) vec2 tex_offset;
20END_PUSH_CONSTANTS
21
22layout(location = 0) out vec2 texcoord;
23
24void main() {
25 float x = float((gl_VertexIndex & 1) << 2);
26 float y = float((gl_VertexIndex & 2) << 1);
27 gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0);
28 texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset);
29}
diff --git a/src/video_core/host_shaders/opengl_copy_bc4.comp b/src/video_core/host_shaders/opengl_copy_bc4.comp
new file mode 100644
index 000000000..7b8e20fbe
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_copy_bc4.comp
@@ -0,0 +1,70 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 430 core
6#extension GL_ARB_gpu_shader_int64 : require
7
8layout (local_size_x = 4, local_size_y = 4) in;
9
10layout(binding = 0, rg32ui) readonly uniform uimage3D bc4_input;
11layout(binding = 1, rgba8ui) writeonly uniform uimage3D bc4_output;
12
13layout(location = 0) uniform uvec3 src_offset;
14layout(location = 1) uniform uvec3 dst_offset;
15
16// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt
17uint DecompressBlock(uint64_t bits, uvec2 coord) {
18 const uint code_offset = 16 + 3 * (4 * coord.y + coord.x);
19 const uint code = uint(bits >> code_offset) & 7;
20 const uint red0 = uint(bits >> 0) & 0xff;
21 const uint red1 = uint(bits >> 8) & 0xff;
22 if (red0 > red1) {
23 switch (code) {
24 case 0:
25 return red0;
26 case 1:
27 return red1;
28 case 2:
29 return (6 * red0 + 1 * red1) / 7;
30 case 3:
31 return (5 * red0 + 2 * red1) / 7;
32 case 4:
33 return (4 * red0 + 3 * red1) / 7;
34 case 5:
35 return (3 * red0 + 4 * red1) / 7;
36 case 6:
37 return (2 * red0 + 5 * red1) / 7;
38 case 7:
39 return (1 * red0 + 6 * red1) / 7;
40 }
41 } else {
42 switch (code) {
43 case 0:
44 return red0;
45 case 1:
46 return red1;
47 case 2:
48 return (4 * red0 + 1 * red1) / 5;
49 case 3:
50 return (3 * red0 + 2 * red1) / 5;
51 case 4:
52 return (2 * red0 + 3 * red1) / 5;
53 case 5:
54 return (1 * red0 + 4 * red1) / 5;
55 case 6:
56 return 0;
57 case 7:
58 return 0xff;
59 }
60 }
61 return 0;
62}
63
64void main() {
65 uvec2 packed_bits = imageLoad(bc4_input, ivec3(gl_WorkGroupID + src_offset)).rg;
66 uint64_t bits = packUint2x32(packed_bits);
67 uint red = DecompressBlock(bits, gl_LocalInvocationID.xy);
68 uvec4 color = uvec4(red & 0xff, 0, 0, 0xff);
69 imageStore(bc4_output, ivec3(gl_GlobalInvocationID + dst_offset), color);
70}
diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag
index 8a4cb024b..84b818227 100644
--- a/src/video_core/host_shaders/opengl_present.frag
+++ b/src/video_core/host_shaders/opengl_present.frag
@@ -1,3 +1,7 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
1#version 430 core 5#version 430 core
2 6
3layout (location = 0) in vec2 frag_tex_coord; 7layout (location = 0) in vec2 frag_tex_coord;
diff --git a/src/video_core/host_shaders/opengl_present.vert b/src/video_core/host_shaders/opengl_present.vert
index 2235d31a4..c3b5adbba 100644
--- a/src/video_core/host_shaders/opengl_present.vert
+++ b/src/video_core/host_shaders/opengl_present.vert
@@ -1,3 +1,7 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
1#version 430 core 5#version 430 core
2 6
3out gl_PerVertex { 7out gl_PerVertex {
diff --git a/src/video_core/host_shaders/pitch_unswizzle.comp b/src/video_core/host_shaders/pitch_unswizzle.comp
new file mode 100644
index 000000000..cb48ec170
--- /dev/null
+++ b/src/video_core/host_shaders/pitch_unswizzle.comp
@@ -0,0 +1,86 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 430
6
7#ifdef VULKAN
8
9#extension GL_EXT_shader_16bit_storage : require
10#extension GL_EXT_shader_8bit_storage : require
11#define HAS_EXTENDED_TYPES 1
12#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
13#define END_PUSH_CONSTANTS };
14#define UNIFORM(n)
15#define BINDING_INPUT_BUFFER 0
16#define BINDING_OUTPUT_IMAGE 1
17
18#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
19
20#extension GL_NV_gpu_shader5 : enable
21#ifdef GL_NV_gpu_shader5
22#define HAS_EXTENDED_TYPES 1
23#else
24#define HAS_EXTENDED_TYPES 0
25#endif
26#define BEGIN_PUSH_CONSTANTS
27#define END_PUSH_CONSTANTS
28#define UNIFORM(n) layout (location = n) uniform
29#define BINDING_INPUT_BUFFER 0
30#define BINDING_OUTPUT_IMAGE 0
31
32#endif
33
34BEGIN_PUSH_CONSTANTS
35UNIFORM(0) uvec2 origin;
36UNIFORM(1) ivec2 destination;
37UNIFORM(2) uint bytes_per_block;
38UNIFORM(3) uint pitch;
39END_PUSH_CONSTANTS
40
41#if HAS_EXTENDED_TYPES
42layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU8 { uint8_t u8data[]; };
43layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU16 { uint16_t u16data[]; };
44#endif
45layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { uint u32data[]; };
46layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU64 { uvec2 u64data[]; };
47layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU128 { uvec4 u128data[]; };
48
49layout(binding = BINDING_OUTPUT_IMAGE) writeonly uniform uimage2D output_image;
50
51layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
52
53uvec4 ReadTexel(uint offset) {
54 switch (bytes_per_block) {
55#if HAS_EXTENDED_TYPES
56 case 1:
57 return uvec4(u8data[offset], 0, 0, 0);
58 case 2:
59 return uvec4(u16data[offset / 2], 0, 0, 0);
60#else
61 case 1:
62 return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0);
63 case 2:
64 return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0);
65#endif
66 case 4:
67 return uvec4(u32data[offset / 4], 0, 0, 0);
68 case 8:
69 return uvec4(u64data[offset / 8], 0, 0);
70 case 16:
71 return u128data[offset / 16];
72 }
73 return uvec4(0);
74}
75
76void main() {
77 uvec2 pos = gl_GlobalInvocationID.xy + origin;
78
79 uint offset = 0;
80 offset += pos.x * bytes_per_block;
81 offset += pos.y * pitch;
82
83 const uvec4 texel = ReadTexel(offset);
84 const ivec2 coord = ivec2(gl_GlobalInvocationID.xy) + destination;
85 imageStore(output_image, coord, texel);
86}
diff --git a/src/video_core/host_shaders/vulkan_blit_color_float.frag b/src/video_core/host_shaders/vulkan_blit_color_float.frag
new file mode 100644
index 000000000..4a6aae410
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_blit_color_float.frag
@@ -0,0 +1,14 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 450
6
7layout(binding = 0) uniform sampler2D tex;
8
9layout(location = 0) in vec2 texcoord;
10layout(location = 0) out vec4 color;
11
12void main() {
13 color = textureLod(tex, texcoord, 0);
14}
diff --git a/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag
new file mode 100644
index 000000000..19bb23a5a
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag
@@ -0,0 +1,16 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 450
6#extension GL_ARB_shader_stencil_export : require
7
8layout(binding = 0) uniform sampler2D depth_tex;
9layout(binding = 1) uniform isampler2D stencil_tex;
10
11layout(location = 0) in vec2 texcoord;
12
13void main() {
14 gl_FragDepth = textureLod(depth_tex, texcoord, 0).r;
15 gl_FragStencilRefARB = textureLod(stencil_tex, texcoord, 0).r;
16}
diff --git a/src/video_core/renderer_vulkan/shaders/blit.frag b/src/video_core/host_shaders/vulkan_present.frag
index a06ecd24a..0979ff3e6 100644
--- a/src/video_core/renderer_vulkan/shaders/blit.frag
+++ b/src/video_core/host_shaders/vulkan_present.frag
@@ -2,15 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5/*
6 * Build instructions:
7 * $ glslangValidator -V $THIS_FILE -o output.spv
8 * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
9 * $ xxd -i optimized.spv
10 *
11 * Then copy that bytecode to the C++ file
12 */
13
14#version 460 core 5#version 460 core
15 6
16layout (location = 0) in vec2 frag_tex_coord; 7layout (location = 0) in vec2 frag_tex_coord;
diff --git a/src/video_core/renderer_vulkan/shaders/blit.vert b/src/video_core/host_shaders/vulkan_present.vert
index c64d9235a..00b868958 100644
--- a/src/video_core/renderer_vulkan/shaders/blit.vert
+++ b/src/video_core/host_shaders/vulkan_present.vert
@@ -2,15 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5/*
6 * Build instructions:
7 * $ glslangValidator -V $THIS_FILE -o output.spv
8 * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
9 * $ xxd -i optimized.spv
10 *
11 * Then copy that bytecode to the C++ file
12 */
13
14#version 460 core 5#version 460 core
15 6
16layout (location = 0) in vec2 vert_position; 7layout (location = 0) in vec2 vert_position;
diff --git a/src/video_core/renderer_vulkan/shaders/quad_array.comp b/src/video_core/host_shaders/vulkan_quad_array.comp
index 5a5703308..212f4e998 100644
--- a/src/video_core/renderer_vulkan/shaders/quad_array.comp
+++ b/src/video_core/host_shaders/vulkan_quad_array.comp
@@ -2,15 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5/*
6 * Build instructions:
7 * $ glslangValidator -V $THIS_FILE -o output.spv
8 * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
9 * $ xxd -i optimized.spv
10 *
11 * Then copy that bytecode to the C++ file
12 */
13
14#version 460 core 5#version 460 core
15 6
16layout (local_size_x = 1024) in; 7layout (local_size_x = 1024) in;
diff --git a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp b/src/video_core/host_shaders/vulkan_quad_indexed.comp
index 5a472ba9b..8655591d0 100644
--- a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp
+++ b/src/video_core/host_shaders/vulkan_quad_indexed.comp
@@ -2,15 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5/*
6 * Build instructions:
7 * $ glslangValidator -V quad_indexed.comp -o output.spv
8 * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
9 * $ xxd -i optimized.spv
10 *
11 * Then copy that bytecode to the C++ file
12 */
13
14#version 460 core 5#version 460 core
15 6
16layout (local_size_x = 1024) in; 7layout (local_size_x = 1024) in;
diff --git a/src/video_core/renderer_vulkan/shaders/uint8.comp b/src/video_core/host_shaders/vulkan_uint8.comp
index a320f3ae0..ad74d7af9 100644
--- a/src/video_core/renderer_vulkan/shaders/uint8.comp
+++ b/src/video_core/host_shaders/vulkan_uint8.comp
@@ -2,15 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5/*
6 * Build instructions:
7 * $ glslangValidator -V $THIS_FILE -o output.spv
8 * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
9 * $ xxd -i optimized.spv
10 *
11 * Then copy that bytecode to the C++ file
12 */
13
14#version 460 core 5#version 460 core
15#extension GL_EXT_shader_16bit_storage : require 6#extension GL_EXT_shader_16bit_storage : require
16#extension GL_EXT_shader_8bit_storage : require 7#extension GL_EXT_shader_8bit_storage : require
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 6e70bd362..65feff588 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -57,7 +57,10 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
57 } 57 }
58 58
59 // Flush and invalidate through the GPU interface, to be asynchronous if possible. 59 // Flush and invalidate through the GPU interface, to be asynchronous if possible.
60 system.GPU().FlushAndInvalidateRegion(*GpuToCpuAddress(gpu_addr), size); 60 const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr);
61 ASSERT(cpu_addr);
62
63 rasterizer->UnmapMemory(*cpu_addr, size);
61 64
62 UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); 65 UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
63} 66}
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 9da9fb4ff..e69de29bb 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -1,250 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <cstring>
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "video_core/morton.h"
10#include "video_core/surface.h"
11#include "video_core/textures/decoders.h"
12
13namespace VideoCore {
14
15using Surface::GetBytesPerPixel;
16using Surface::PixelFormat;
17
18using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*);
19using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
20
21template <bool morton_to_linear, PixelFormat format>
22static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
23 u32 tile_width_spacing, u8* buffer, u8* addr) {
24 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
25
26 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
27 // pixel values.
28 constexpr u32 tile_size_x{GetDefaultBlockWidth(format)};
29 constexpr u32 tile_size_y{GetDefaultBlockHeight(format)};
30
31 if constexpr (morton_to_linear) {
32 Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
33 stride, height, depth, block_height, block_depth,
34 tile_width_spacing);
35 } else {
36 Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
37 (height + tile_size_y - 1) / tile_size_y, depth,
38 bytes_per_pixel, bytes_per_pixel, addr, buffer, false,
39 block_height, block_depth, tile_width_spacing);
40 }
41}
42
43static constexpr ConversionArray morton_to_linear_fns = {
44 MortonCopy<true, PixelFormat::A8B8G8R8_UNORM>,
45 MortonCopy<true, PixelFormat::A8B8G8R8_SNORM>,
46 MortonCopy<true, PixelFormat::A8B8G8R8_SINT>,
47 MortonCopy<true, PixelFormat::A8B8G8R8_UINT>,
48 MortonCopy<true, PixelFormat::R5G6B5_UNORM>,
49 MortonCopy<true, PixelFormat::B5G6R5_UNORM>,
50 MortonCopy<true, PixelFormat::A1R5G5B5_UNORM>,
51 MortonCopy<true, PixelFormat::A2B10G10R10_UNORM>,
52 MortonCopy<true, PixelFormat::A2B10G10R10_UINT>,
53 MortonCopy<true, PixelFormat::A1B5G5R5_UNORM>,
54 MortonCopy<true, PixelFormat::R8_UNORM>,
55 MortonCopy<true, PixelFormat::R8_SNORM>,
56 MortonCopy<true, PixelFormat::R8_SINT>,
57 MortonCopy<true, PixelFormat::R8_UINT>,
58 MortonCopy<true, PixelFormat::R16G16B16A16_FLOAT>,
59 MortonCopy<true, PixelFormat::R16G16B16A16_UNORM>,
60 MortonCopy<true, PixelFormat::R16G16B16A16_SNORM>,
61 MortonCopy<true, PixelFormat::R16G16B16A16_SINT>,
62 MortonCopy<true, PixelFormat::R16G16B16A16_UINT>,
63 MortonCopy<true, PixelFormat::B10G11R11_FLOAT>,
64 MortonCopy<true, PixelFormat::R32G32B32A32_UINT>,
65 MortonCopy<true, PixelFormat::BC1_RGBA_UNORM>,
66 MortonCopy<true, PixelFormat::BC2_UNORM>,
67 MortonCopy<true, PixelFormat::BC3_UNORM>,
68 MortonCopy<true, PixelFormat::BC4_UNORM>,
69 MortonCopy<true, PixelFormat::BC4_SNORM>,
70 MortonCopy<true, PixelFormat::BC5_UNORM>,
71 MortonCopy<true, PixelFormat::BC5_SNORM>,
72 MortonCopy<true, PixelFormat::BC7_UNORM>,
73 MortonCopy<true, PixelFormat::BC6H_UFLOAT>,
74 MortonCopy<true, PixelFormat::BC6H_SFLOAT>,
75 MortonCopy<true, PixelFormat::ASTC_2D_4X4_UNORM>,
76 MortonCopy<true, PixelFormat::B8G8R8A8_UNORM>,
77 MortonCopy<true, PixelFormat::R32G32B32A32_FLOAT>,
78 MortonCopy<true, PixelFormat::R32G32B32A32_SINT>,
79 MortonCopy<true, PixelFormat::R32G32_FLOAT>,
80 MortonCopy<true, PixelFormat::R32G32_SINT>,
81 MortonCopy<true, PixelFormat::R32_FLOAT>,
82 MortonCopy<true, PixelFormat::R16_FLOAT>,
83 MortonCopy<true, PixelFormat::R16_UNORM>,
84 MortonCopy<true, PixelFormat::R16_SNORM>,
85 MortonCopy<true, PixelFormat::R16_UINT>,
86 MortonCopy<true, PixelFormat::R16_SINT>,
87 MortonCopy<true, PixelFormat::R16G16_UNORM>,
88 MortonCopy<true, PixelFormat::R16G16_FLOAT>,
89 MortonCopy<true, PixelFormat::R16G16_UINT>,
90 MortonCopy<true, PixelFormat::R16G16_SINT>,
91 MortonCopy<true, PixelFormat::R16G16_SNORM>,
92 MortonCopy<true, PixelFormat::R32G32B32_FLOAT>,
93 MortonCopy<true, PixelFormat::A8B8G8R8_SRGB>,
94 MortonCopy<true, PixelFormat::R8G8_UNORM>,
95 MortonCopy<true, PixelFormat::R8G8_SNORM>,
96 MortonCopy<true, PixelFormat::R8G8_SINT>,
97 MortonCopy<true, PixelFormat::R8G8_UINT>,
98 MortonCopy<true, PixelFormat::R32G32_UINT>,
99 MortonCopy<true, PixelFormat::R16G16B16X16_FLOAT>,
100 MortonCopy<true, PixelFormat::R32_UINT>,
101 MortonCopy<true, PixelFormat::R32_SINT>,
102 MortonCopy<true, PixelFormat::ASTC_2D_8X8_UNORM>,
103 MortonCopy<true, PixelFormat::ASTC_2D_8X5_UNORM>,
104 MortonCopy<true, PixelFormat::ASTC_2D_5X4_UNORM>,
105 MortonCopy<true, PixelFormat::B8G8R8A8_SRGB>,
106 MortonCopy<true, PixelFormat::BC1_RGBA_SRGB>,
107 MortonCopy<true, PixelFormat::BC2_SRGB>,
108 MortonCopy<true, PixelFormat::BC3_SRGB>,
109 MortonCopy<true, PixelFormat::BC7_SRGB>,
110 MortonCopy<true, PixelFormat::A4B4G4R4_UNORM>,
111 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
112 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
113 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
114 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
115 MortonCopy<true, PixelFormat::ASTC_2D_5X5_UNORM>,
116 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
117 MortonCopy<true, PixelFormat::ASTC_2D_10X8_UNORM>,
118 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
119 MortonCopy<true, PixelFormat::ASTC_2D_6X6_UNORM>,
120 MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>,
121 MortonCopy<true, PixelFormat::ASTC_2D_10X10_UNORM>,
122 MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>,
123 MortonCopy<true, PixelFormat::ASTC_2D_12X12_UNORM>,
124 MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>,
125 MortonCopy<true, PixelFormat::ASTC_2D_8X6_UNORM>,
126 MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>,
127 MortonCopy<true, PixelFormat::ASTC_2D_6X5_UNORM>,
128 MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>,
129 MortonCopy<true, PixelFormat::E5B9G9R9_FLOAT>,
130 MortonCopy<true, PixelFormat::D32_FLOAT>,
131 MortonCopy<true, PixelFormat::D16_UNORM>,
132 MortonCopy<true, PixelFormat::D24_UNORM_S8_UINT>,
133 MortonCopy<true, PixelFormat::S8_UINT_D24_UNORM>,
134 MortonCopy<true, PixelFormat::D32_FLOAT_S8_UINT>,
135};
136
137static constexpr ConversionArray linear_to_morton_fns = {
138 MortonCopy<false, PixelFormat::A8B8G8R8_UNORM>,
139 MortonCopy<false, PixelFormat::A8B8G8R8_SNORM>,
140 MortonCopy<false, PixelFormat::A8B8G8R8_SINT>,
141 MortonCopy<false, PixelFormat::A8B8G8R8_UINT>,
142 MortonCopy<false, PixelFormat::R5G6B5_UNORM>,
143 MortonCopy<false, PixelFormat::B5G6R5_UNORM>,
144 MortonCopy<false, PixelFormat::A1R5G5B5_UNORM>,
145 MortonCopy<false, PixelFormat::A2B10G10R10_UNORM>,
146 MortonCopy<false, PixelFormat::A2B10G10R10_UINT>,
147 MortonCopy<false, PixelFormat::A1B5G5R5_UNORM>,
148 MortonCopy<false, PixelFormat::R8_UNORM>,
149 MortonCopy<false, PixelFormat::R8_SNORM>,
150 MortonCopy<false, PixelFormat::R8_SINT>,
151 MortonCopy<false, PixelFormat::R8_UINT>,
152 MortonCopy<false, PixelFormat::R16G16B16A16_FLOAT>,
153 MortonCopy<false, PixelFormat::R16G16B16A16_SNORM>,
154 MortonCopy<false, PixelFormat::R16G16B16A16_SINT>,
155 MortonCopy<false, PixelFormat::R16G16B16A16_UNORM>,
156 MortonCopy<false, PixelFormat::R16G16B16A16_UINT>,
157 MortonCopy<false, PixelFormat::B10G11R11_FLOAT>,
158 MortonCopy<false, PixelFormat::R32G32B32A32_UINT>,
159 MortonCopy<false, PixelFormat::BC1_RGBA_UNORM>,
160 MortonCopy<false, PixelFormat::BC2_UNORM>,
161 MortonCopy<false, PixelFormat::BC3_UNORM>,
162 MortonCopy<false, PixelFormat::BC4_UNORM>,
163 MortonCopy<false, PixelFormat::BC4_SNORM>,
164 MortonCopy<false, PixelFormat::BC5_UNORM>,
165 MortonCopy<false, PixelFormat::BC5_SNORM>,
166 MortonCopy<false, PixelFormat::BC7_UNORM>,
167 MortonCopy<false, PixelFormat::BC6H_UFLOAT>,
168 MortonCopy<false, PixelFormat::BC6H_SFLOAT>,
169 // TODO(Subv): Swizzling ASTC formats are not supported
170 nullptr,
171 MortonCopy<false, PixelFormat::B8G8R8A8_UNORM>,
172 MortonCopy<false, PixelFormat::R32G32B32A32_FLOAT>,
173 MortonCopy<false, PixelFormat::R32G32B32A32_SINT>,
174 MortonCopy<false, PixelFormat::R32G32_FLOAT>,
175 MortonCopy<false, PixelFormat::R32G32_SINT>,
176 MortonCopy<false, PixelFormat::R32_FLOAT>,
177 MortonCopy<false, PixelFormat::R16_FLOAT>,
178 MortonCopy<false, PixelFormat::R16_UNORM>,
179 MortonCopy<false, PixelFormat::R16_SNORM>,
180 MortonCopy<false, PixelFormat::R16_UINT>,
181 MortonCopy<false, PixelFormat::R16_SINT>,
182 MortonCopy<false, PixelFormat::R16G16_UNORM>,
183 MortonCopy<false, PixelFormat::R16G16_FLOAT>,
184 MortonCopy<false, PixelFormat::R16G16_UINT>,
185 MortonCopy<false, PixelFormat::R16G16_SINT>,
186 MortonCopy<false, PixelFormat::R16G16_SNORM>,
187 MortonCopy<false, PixelFormat::R32G32B32_FLOAT>,
188 MortonCopy<false, PixelFormat::A8B8G8R8_SRGB>,
189 MortonCopy<false, PixelFormat::R8G8_UNORM>,
190 MortonCopy<false, PixelFormat::R8G8_SNORM>,
191 MortonCopy<false, PixelFormat::R8G8_SINT>,
192 MortonCopy<false, PixelFormat::R8G8_UINT>,
193 MortonCopy<false, PixelFormat::R32G32_UINT>,
194 MortonCopy<false, PixelFormat::R16G16B16X16_FLOAT>,
195 MortonCopy<false, PixelFormat::R32_UINT>,
196 MortonCopy<false, PixelFormat::R32_SINT>,
197 nullptr,
198 nullptr,
199 nullptr,
200 MortonCopy<false, PixelFormat::B8G8R8A8_SRGB>,
201 MortonCopy<false, PixelFormat::BC1_RGBA_SRGB>,
202 MortonCopy<false, PixelFormat::BC2_SRGB>,
203 MortonCopy<false, PixelFormat::BC3_SRGB>,
204 MortonCopy<false, PixelFormat::BC7_SRGB>,
205 MortonCopy<false, PixelFormat::A4B4G4R4_UNORM>,
206 nullptr,
207 nullptr,
208 nullptr,
209 nullptr,
210 nullptr,
211 nullptr,
212 nullptr,
213 nullptr,
214 nullptr,
215 nullptr,
216 nullptr,
217 nullptr,
218 nullptr,
219 nullptr,
220 nullptr,
221 nullptr,
222 nullptr,
223 nullptr,
224 MortonCopy<false, PixelFormat::E5B9G9R9_FLOAT>,
225 MortonCopy<false, PixelFormat::D32_FLOAT>,
226 MortonCopy<false, PixelFormat::D16_UNORM>,
227 MortonCopy<false, PixelFormat::D24_UNORM_S8_UINT>,
228 MortonCopy<false, PixelFormat::S8_UINT_D24_UNORM>,
229 MortonCopy<false, PixelFormat::D32_FLOAT_S8_UINT>,
230};
231
232static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
233 switch (mode) {
234 case MortonSwizzleMode::MortonToLinear:
235 return morton_to_linear_fns[static_cast<std::size_t>(format)];
236 case MortonSwizzleMode::LinearToMorton:
237 return linear_to_morton_fns[static_cast<std::size_t>(format)];
238 }
239 UNREACHABLE();
240 return morton_to_linear_fns[static_cast<std::size_t>(format)];
241}
242
243void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
244 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
245 u8* buffer, u8* addr) {
246 GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
247 tile_width_spacing, buffer, addr);
248}
249
250} // namespace VideoCore
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
index b714a7e3f..e69de29bb 100644
--- a/src/video_core/morton.h
+++ b/src/video_core/morton.h
@@ -1,18 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/surface.h"
9
10namespace VideoCore {
11
12enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
13
14void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
15 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
16 u8* buffer, u8* addr);
17
18} // namespace VideoCore
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 27ef4c69a..0cb0f387d 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -76,6 +76,9 @@ public:
76 /// Sync memory between guest and host. 76 /// Sync memory between guest and host.
77 virtual void SyncGuestHost() = 0; 77 virtual void SyncGuestHost() = 0;
78 78
79 /// Unmap memory range
80 virtual void UnmapMemory(VAddr addr, u64 size) = 0;
81
79 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 82 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
80 /// and invalidated 83 /// and invalidated
81 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; 84 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
@@ -83,6 +86,12 @@ public:
83 /// Notify the host renderer to wait for previous primitive and compute operations. 86 /// Notify the host renderer to wait for previous primitive and compute operations.
84 virtual void WaitForIdle() = 0; 87 virtual void WaitForIdle() = 0;
85 88
89 /// Notify the host renderer to wait for reads and writes to render targets and flush caches.
90 virtual void FragmentBarrier() = 0;
91
92 /// Notify the host renderer to make available previous render target writes.
93 virtual void TiledCacheBarrier() = 0;
94
86 /// Notify the rasterizer to send all written commands to the host GPU. 95 /// Notify the rasterizer to send all written commands to the host GPU.
87 virtual void FlushCommands() = 0; 96 virtual void FlushCommands() = 0;
88 97
@@ -91,8 +100,7 @@ public:
91 100
92 /// Attempt to use a faster method to perform a surface copy 101 /// Attempt to use a faster method to perform a surface copy
93 [[nodiscard]] virtual bool AccelerateSurfaceCopy( 102 [[nodiscard]] virtual bool AccelerateSurfaceCopy(
94 const Tegra::Engines::Fermi2D::Regs::Surface& src, 103 const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst,
95 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
96 const Tegra::Engines::Fermi2D::Config& copy_config) { 104 const Tegra::Engines::Fermi2D::Config& copy_config) {
97 return false; 105 return false;
98 } 106 }
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 60735d502..5772cad87 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -61,10 +61,9 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
61 61
62OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, 62OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
63 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 63 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
64 const Device& device_, std::size_t stream_size_) 64 const Device& device_, OGLStreamBuffer& stream_buffer_,
65 : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, 65 StateTracker& state_tracker)
66 std::make_unique<OGLStreamBuffer>(device_, stream_size_, true)}, 66 : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
67 device{device_} {
68 if (!device.HasFastBufferSubData()) { 67 if (!device.HasFastBufferSubData()) {
69 return; 68 return;
70 } 69 }
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 95251e26b..17ee90316 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -22,6 +22,7 @@ namespace OpenGL {
22class Device; 22class Device;
23class OGLStreamBuffer; 23class OGLStreamBuffer;
24class RasterizerOpenGL; 24class RasterizerOpenGL;
25class StateTracker;
25 26
26class Buffer : public VideoCommon::BufferBlock { 27class Buffer : public VideoCommon::BufferBlock {
27public: 28public:
@@ -52,9 +53,10 @@ private:
52using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; 53using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
53class OGLBufferCache final : public GenericBufferCache { 54class OGLBufferCache final : public GenericBufferCache {
54public: 55public:
55 explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, 56 explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
56 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 57 Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
57 const Device& device_, std::size_t stream_size_); 58 const Device& device, OGLStreamBuffer& stream_buffer,
59 StateTracker& state_tracker);
58 ~OGLBufferCache(); 60 ~OGLBufferCache();
59 61
60 BufferInfo GetEmptyBuffer(std::size_t) override; 62 BufferInfo GetEmptyBuffer(std::size_t) override;
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index a94e4f72e..81b71edfb 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -5,9 +5,11 @@
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7#include <cstddef> 7#include <cstddef>
8#include <cstdlib>
8#include <cstring> 9#include <cstring>
9#include <limits> 10#include <limits>
10#include <optional> 11#include <optional>
12#include <span>
11#include <vector> 13#include <vector>
12 14
13#include <glad/glad.h> 15#include <glad/glad.h>
@@ -27,27 +29,29 @@ constexpr u32 ReservedUniformBlocks = 1;
27 29
28constexpr u32 NumStages = 5; 30constexpr u32 NumStages = 5;
29 31
30constexpr std::array LimitUBOs = { 32constexpr std::array LIMIT_UBOS = {
31 GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, 33 GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
32 GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, 34 GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
33 GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS}; 35 GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
34 36};
35constexpr std::array LimitSSBOs = { 37constexpr std::array LIMIT_SSBOS = {
36 GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, 38 GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
37 GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, 39 GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
38 GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS}; 40 GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
39 41};
40constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, 42constexpr std::array LIMIT_SAMPLERS = {
41 GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, 43 GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
42 GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, 44 GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
43 GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, 45 GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
44 GL_MAX_TEXTURE_IMAGE_UNITS, 46 GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
45 GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS}; 47 GL_MAX_TEXTURE_IMAGE_UNITS,
46 48 GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
47constexpr std::array LimitImages = { 49};
50constexpr std::array LIMIT_IMAGES = {
48 GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, 51 GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
49 GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, 52 GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
50 GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS}; 53 GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
54};
51 55
52template <typename T> 56template <typename T>
53T GetInteger(GLenum pname) { 57T GetInteger(GLenum pname) {
@@ -76,8 +80,8 @@ std::vector<std::string_view> GetExtensions() {
76 return extensions; 80 return extensions;
77} 81}
78 82
79bool HasExtension(const std::vector<std::string_view>& images, std::string_view extension) { 83bool HasExtension(std::span<const std::string_view> extensions, std::string_view extension) {
80 return std::find(images.begin(), images.end(), extension) != images.end(); 84 return std::ranges::find(extensions, extension) != extensions.end();
81} 85}
82 86
83u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { 87u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
@@ -91,8 +95,8 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
91 95
92std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept { 96std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
93 std::array<u32, Tegra::Engines::MaxShaderTypes> max; 97 std::array<u32, Tegra::Engines::MaxShaderTypes> max;
94 std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(), 98 std::ranges::transform(LIMIT_UBOS, max.begin(),
95 [](GLenum pname) { return GetInteger<u32>(pname); }); 99 [](GLenum pname) { return GetInteger<u32>(pname); });
96 return max; 100 return max;
97} 101}
98 102
@@ -115,9 +119,10 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
115 for (std::size_t i = 0; i < NumStages; ++i) { 119 for (std::size_t i = 0; i < NumStages; ++i) {
116 const std::size_t stage = stage_swizzle[i]; 120 const std::size_t stage = stage_swizzle[i];
117 bindings[stage] = { 121 bindings[stage] = {
118 Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]), 122 Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
119 Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]), 123 Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
120 Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])}; 124 Extract(base_samplers, num_samplers, total_samplers / NumStages,
125 LIMIT_SAMPLERS[stage])};
121 } 126 }
122 127
123 u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS); 128 u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
@@ -130,7 +135,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
130 135
131 // Reserve at least 4 image bindings on the fragment stage. 136 // Reserve at least 4 image bindings on the fragment stage.
132 bindings[4].image = 137 bindings[4].image =
133 Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]); 138 Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
134 139
135 // This is guaranteed to be at least 1. 140 // This is guaranteed to be at least 1.
136 const u32 total_extracted_images = num_images / (NumStages - 1); 141 const u32 total_extracted_images = num_images / (NumStages - 1);
@@ -142,7 +147,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
142 continue; 147 continue;
143 } 148 }
144 bindings[stage].image = 149 bindings[stage].image =
145 Extract(base_images, num_images, total_extracted_images, LimitImages[stage]); 150 Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
146 } 151 }
147 152
148 // Compute doesn't care about any of this. 153 // Compute doesn't care about any of this.
@@ -188,6 +193,11 @@ bool IsASTCSupported() {
188 return true; 193 return true;
189} 194}
190 195
196[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) {
197 const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
198 return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
199}
200
191} // Anonymous namespace 201} // Anonymous namespace
192 202
193Device::Device() 203Device::Device()
@@ -198,6 +208,7 @@ Device::Device()
198 208
199 const bool is_nvidia = vendor == "NVIDIA Corporation"; 209 const bool is_nvidia = vendor == "NVIDIA Corporation";
200 const bool is_amd = vendor == "ATI Technologies Inc."; 210 const bool is_amd = vendor == "ATI Technologies Inc.";
211 const bool is_intel = vendor == "Intel";
201 212
202 bool disable_fast_buffer_sub_data = false; 213 bool disable_fast_buffer_sub_data = false;
203 if (is_nvidia && version == "4.6.0 NVIDIA 443.24") { 214 if (is_nvidia && version == "4.6.0 NVIDIA 443.24") {
@@ -206,9 +217,8 @@ Device::Device()
206 "Beta driver 443.24 is known to have issues. There might be performance issues."); 217 "Beta driver 443.24 is known to have issues. There might be performance issues.");
207 disable_fast_buffer_sub_data = true; 218 disable_fast_buffer_sub_data = true;
208 } 219 }
209 220 uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
210 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 221 shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
211 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
212 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); 222 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
213 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); 223 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
214 max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); 224 max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
@@ -222,8 +232,10 @@ Device::Device()
222 has_variable_aoffi = TestVariableAoffi(); 232 has_variable_aoffi = TestVariableAoffi();
223 has_component_indexing_bug = is_amd; 233 has_component_indexing_bug = is_amd;
224 has_precise_bug = TestPreciseBug(); 234 has_precise_bug = TestPreciseBug();
235 has_broken_texture_view_formats = is_amd || is_intel;
225 has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; 236 has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
226 has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; 237 has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
238 has_debugging_tool_attached = IsDebugToolAttached(extensions);
227 239
228 // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive 240 // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
229 // uniform buffers as "push constants" 241 // uniform buffers as "push constants"
@@ -238,6 +250,8 @@ Device::Device()
238 LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); 250 LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
239 LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); 251 LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
240 LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); 252 LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
253 LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}",
254 has_broken_texture_view_formats);
241 255
242 if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) { 256 if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) {
243 LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); 257 LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 8a4b6b9fc..3e79d1e37 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -36,11 +36,11 @@ public:
36 return GetBaseBindings(static_cast<std::size_t>(shader_type)); 36 return GetBaseBindings(static_cast<std::size_t>(shader_type));
37 } 37 }
38 38
39 std::size_t GetUniformBufferAlignment() const { 39 size_t GetUniformBufferAlignment() const {
40 return uniform_buffer_alignment; 40 return uniform_buffer_alignment;
41 } 41 }
42 42
43 std::size_t GetShaderStorageBufferAlignment() const { 43 size_t GetShaderStorageBufferAlignment() const {
44 return shader_storage_alignment; 44 return shader_storage_alignment;
45 } 45 }
46 46
@@ -96,6 +96,10 @@ public:
96 return has_precise_bug; 96 return has_precise_bug;
97 } 97 }
98 98
99 bool HasBrokenTextureViewFormats() const {
100 return has_broken_texture_view_formats;
101 }
102
99 bool HasFastBufferSubData() const { 103 bool HasFastBufferSubData() const {
100 return has_fast_buffer_sub_data; 104 return has_fast_buffer_sub_data;
101 } 105 }
@@ -104,6 +108,10 @@ public:
104 return has_nv_viewport_array2; 108 return has_nv_viewport_array2;
105 } 109 }
106 110
111 bool HasDebuggingToolAttached() const {
112 return has_debugging_tool_attached;
113 }
114
107 bool UseAssemblyShaders() const { 115 bool UseAssemblyShaders() const {
108 return use_assembly_shaders; 116 return use_assembly_shaders;
109 } 117 }
@@ -118,8 +126,8 @@ private:
118 126
119 std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; 127 std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
120 std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; 128 std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
121 std::size_t uniform_buffer_alignment{}; 129 size_t uniform_buffer_alignment{};
122 std::size_t shader_storage_alignment{}; 130 size_t shader_storage_alignment{};
123 u32 max_vertex_attributes{}; 131 u32 max_vertex_attributes{};
124 u32 max_varyings{}; 132 u32 max_varyings{};
125 u32 max_compute_shared_memory_size{}; 133 u32 max_compute_shared_memory_size{};
@@ -133,8 +141,10 @@ private:
133 bool has_variable_aoffi{}; 141 bool has_variable_aoffi{};
134 bool has_component_indexing_bug{}; 142 bool has_component_indexing_bug{};
135 bool has_precise_bug{}; 143 bool has_precise_bug{};
144 bool has_broken_texture_view_formats{};
136 bool has_fast_buffer_sub_data{}; 145 bool has_fast_buffer_sub_data{};
137 bool has_nv_viewport_array2{}; 146 bool has_nv_viewport_array2{};
147 bool has_debugging_tool_attached{};
138 bool use_assembly_shaders{}; 148 bool use_assembly_shaders{};
139 bool use_asynchronous_shaders{}; 149 bool use_asynchronous_shaders{};
140}; 150};
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 6040646cb..3e9c922f5 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -46,7 +46,7 @@ void GLInnerFence::Wait() {
46} 46}
47 47
48FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, 48FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
49 Tegra::GPU& gpu_, TextureCacheOpenGL& texture_cache_, 49 Tegra::GPU& gpu_, TextureCache& texture_cache_,
50 OGLBufferCache& buffer_cache_, QueryCache& query_cache_) 50 OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
51 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} 51 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
52 52
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index 39ca6125b..30dbee613 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -33,12 +33,12 @@ private:
33 33
34using Fence = std::shared_ptr<GLInnerFence>; 34using Fence = std::shared_ptr<GLInnerFence>;
35using GenericFenceManager = 35using GenericFenceManager =
36 VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>; 36 VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
37 37
38class FenceManagerOpenGL final : public GenericFenceManager { 38class FenceManagerOpenGL final : public GenericFenceManager {
39public: 39public:
40 explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 40 explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
41 TextureCacheOpenGL& texture_cache_, OGLBufferCache& buffer_cache_, 41 TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
42 QueryCache& query_cache_); 42 QueryCache& query_cache_);
43 43
44protected: 44protected:
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
deleted file mode 100644
index b8a512cb6..000000000
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <tuple>
6#include <unordered_map>
7#include <utility>
8
9#include <glad/glad.h>
10
11#include "common/common_types.h"
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
14
15namespace OpenGL {
16
17using Maxwell = Tegra::Engines::Maxwell3D::Regs;
18using VideoCore::Surface::SurfaceType;
19
20FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default;
21
22FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default;
23
24GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) {
25 const auto [entry, is_cache_miss] = cache.try_emplace(key);
26 auto& framebuffer{entry->second};
27 if (is_cache_miss) {
28 framebuffer = CreateFramebuffer(key);
29 }
30 return framebuffer.handle;
31}
32
33OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) {
34 OGLFramebuffer framebuffer;
35 framebuffer.Create();
36
37 // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs.
38 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle);
39
40 if (key.zeta) {
41 const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil;
42 const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
43 key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
44 }
45
46 std::size_t num_buffers = 0;
47 std::array<GLenum, Maxwell::NumRenderTargets> targets;
48
49 for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
50 if (!key.colors[index]) {
51 targets[index] = GL_NONE;
52 continue;
53 }
54 const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index);
55 key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
56
57 const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111;
58 targets[index] = GL_COLOR_ATTACHMENT0 + attachment;
59 num_buffers = index + 1;
60 }
61
62 if (num_buffers > 0) {
63 glDrawBuffers(static_cast<GLsizei>(num_buffers), std::data(targets));
64 } else {
65 glDrawBuffer(GL_NONE);
66 }
67
68 return framebuffer;
69}
70
71std::size_t FramebufferCacheKey::Hash() const noexcept {
72 std::size_t hash = std::hash<View>{}(zeta);
73 for (const auto& color : colors) {
74 hash ^= std::hash<View>{}(color);
75 }
76 hash ^= static_cast<std::size_t>(color_attachments) << 16;
77 return hash;
78}
79
80bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept {
81 return std::tie(colors, zeta, color_attachments) ==
82 std::tie(rhs.colors, rhs.zeta, rhs.color_attachments);
83}
84
85} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
deleted file mode 100644
index 8f698fee0..000000000
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h
+++ /dev/null
@@ -1,68 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstddef>
9#include <unordered_map>
10
11#include <glad/glad.h>
12
13#include "common/common_types.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/renderer_opengl/gl_resource_manager.h"
16#include "video_core/renderer_opengl/gl_texture_cache.h"
17
18namespace OpenGL {
19
20constexpr std::size_t BitsPerAttachment = 4;
21
22struct FramebufferCacheKey {
23 View zeta;
24 std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
25 u32 color_attachments = 0;
26
27 std::size_t Hash() const noexcept;
28
29 bool operator==(const FramebufferCacheKey& rhs) const noexcept;
30
31 bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
32 return !operator==(rhs);
33 }
34
35 void SetAttachment(std::size_t index, u32 attachment) {
36 color_attachments |= attachment << (BitsPerAttachment * index);
37 }
38};
39
40} // namespace OpenGL
41
42namespace std {
43
44template <>
45struct hash<OpenGL::FramebufferCacheKey> {
46 std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept {
47 return k.Hash();
48 }
49};
50
51} // namespace std
52
53namespace OpenGL {
54
55class FramebufferCacheOpenGL {
56public:
57 FramebufferCacheOpenGL();
58 ~FramebufferCacheOpenGL();
59
60 GLuint GetFramebuffer(const FramebufferCacheKey& key);
61
62private:
63 OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key);
64
65 std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache;
66};
67
68} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e58e84759..8aa63d329 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -25,12 +25,15 @@
25#include "video_core/engines/maxwell_3d.h" 25#include "video_core/engines/maxwell_3d.h"
26#include "video_core/engines/shader_type.h" 26#include "video_core/engines/shader_type.h"
27#include "video_core/memory_manager.h" 27#include "video_core/memory_manager.h"
28#include "video_core/renderer_opengl/gl_device.h"
28#include "video_core/renderer_opengl/gl_query_cache.h" 29#include "video_core/renderer_opengl/gl_query_cache.h"
29#include "video_core/renderer_opengl/gl_rasterizer.h" 30#include "video_core/renderer_opengl/gl_rasterizer.h"
30#include "video_core/renderer_opengl/gl_shader_cache.h" 31#include "video_core/renderer_opengl/gl_shader_cache.h"
32#include "video_core/renderer_opengl/gl_texture_cache.h"
31#include "video_core/renderer_opengl/maxwell_to_gl.h" 33#include "video_core/renderer_opengl/maxwell_to_gl.h"
32#include "video_core/renderer_opengl/renderer_opengl.h" 34#include "video_core/renderer_opengl/renderer_opengl.h"
33#include "video_core/shader_cache.h" 35#include "video_core/shader_cache.h"
36#include "video_core/texture_cache/texture_cache.h"
34 37
35namespace OpenGL { 38namespace OpenGL {
36 39
@@ -55,18 +58,32 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
55 58
56namespace { 59namespace {
57 60
58constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18; 61constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
59constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = 62constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
60 NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize; 63 NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
61constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = 64constexpr size_t TOTAL_CONST_BUFFER_BYTES =
62 NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; 65 NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
63 66
64constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; 67constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
65constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; 68constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
69
70constexpr size_t MAX_TEXTURES = 192;
71constexpr size_t MAX_IMAGES = 48;
72
73struct TextureHandle {
74 constexpr TextureHandle(u32 data, bool via_header_index) {
75 const Tegra::Texture::TextureHandle handle{data};
76 image = handle.tic_id;
77 sampler = via_header_index ? image : handle.tsc_id.Value();
78 }
79
80 u32 image;
81 u32 sampler;
82};
66 83
67template <typename Engine, typename Entry> 84template <typename Engine, typename Entry>
68Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, 85TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
69 ShaderType shader_type, std::size_t index = 0) { 86 ShaderType shader_type, size_t index = 0) {
70 if constexpr (std::is_same_v<Entry, SamplerEntry>) { 87 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
71 if (entry.is_separated) { 88 if (entry.is_separated) {
72 const u32 buffer_1 = entry.buffer; 89 const u32 buffer_1 = entry.buffer;
@@ -75,21 +92,16 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
75 const u32 offset_2 = entry.secondary_offset; 92 const u32 offset_2 = entry.secondary_offset;
76 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); 93 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
77 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); 94 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
78 return engine.GetTextureInfo(handle_1 | handle_2); 95 return TextureHandle(handle_1 | handle_2, via_header_index);
79 } 96 }
80 } 97 }
81 if (entry.is_bindless) { 98 if (entry.is_bindless) {
82 const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); 99 const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
83 return engine.GetTextureInfo(handle); 100 return TextureHandle(raw, via_header_index);
84 }
85
86 const auto& gpu_profile = engine.AccessGuestDriverProfile();
87 const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
88 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
89 return engine.GetStageTexture(shader_type, offset);
90 } else {
91 return engine.GetTexture(offset);
92 } 101 }
102 const u32 buffer = engine.GetBoundBuffer();
103 const u64 offset = (entry.offset + index) * sizeof(u32);
104 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
93} 105}
94 106
95std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, 107std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
@@ -97,7 +109,6 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
97 if (!entry.IsIndirect()) { 109 if (!entry.IsIndirect()) {
98 return entry.GetSize(); 110 return entry.GetSize();
99 } 111 }
100
101 if (buffer.size > Maxwell::MaxConstBufferSize) { 112 if (buffer.size > Maxwell::MaxConstBufferSize) {
102 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, 113 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
103 Maxwell::MaxConstBufferSize); 114 Maxwell::MaxConstBufferSize);
@@ -147,23 +158,60 @@ void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ss
147 reinterpret_cast<const GLuint*>(ssbos)); 158 reinterpret_cast<const GLuint*>(ssbos));
148} 159}
149 160
161ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
162 if (entry.is_buffer) {
163 return ImageViewType::Buffer;
164 }
165 switch (entry.type) {
166 case Tegra::Shader::TextureType::Texture1D:
167 return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
168 case Tegra::Shader::TextureType::Texture2D:
169 return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
170 case Tegra::Shader::TextureType::Texture3D:
171 return ImageViewType::e3D;
172 case Tegra::Shader::TextureType::TextureCube:
173 return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
174 }
175 UNREACHABLE();
176 return ImageViewType::e2D;
177}
178
179ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
180 switch (entry.type) {
181 case Tegra::Shader::ImageType::Texture1D:
182 return ImageViewType::e1D;
183 case Tegra::Shader::ImageType::Texture1DArray:
184 return ImageViewType::e1DArray;
185 case Tegra::Shader::ImageType::Texture2D:
186 return ImageViewType::e2D;
187 case Tegra::Shader::ImageType::Texture2DArray:
188 return ImageViewType::e2DArray;
189 case Tegra::Shader::ImageType::Texture3D:
190 return ImageViewType::e3D;
191 case Tegra::Shader::ImageType::TextureBuffer:
192 return ImageViewType::Buffer;
193 }
194 UNREACHABLE();
195 return ImageViewType::e2D;
196}
197
150} // Anonymous namespace 198} // Anonymous namespace
151 199
152RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 200RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
153 Core::Memory::Memory& cpu_memory_, const Device& device_, 201 Core::Memory::Memory& cpu_memory_, const Device& device_,
154 ScreenInfo& screen_info_, ProgramManager& program_manager_, 202 ScreenInfo& screen_info_, ProgramManager& program_manager_,
155 StateTracker& state_tracker_) 203 StateTracker& state_tracker_)
156 : RasterizerAccelerated{cpu_memory_}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()), 204 : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
157 kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), 205 kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
158 screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), 206 screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
159 texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker), 207 stream_buffer(device, state_tracker),
208 texture_cache_runtime(device, program_manager, state_tracker),
209 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
160 shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), 210 shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
161 query_cache(*this, maxwell3d, gpu_memory), 211 query_cache(*this, maxwell3d, gpu_memory),
162 buffer_cache(*this, gpu_memory, cpu_memory_, device, STREAM_BUFFER_SIZE), 212 buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
163 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), 213 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
164 async_shaders(emu_window_) { 214 async_shaders(emu_window_) {
165 CheckExtensions();
166
167 unified_uniform_buffer.Create(); 215 unified_uniform_buffer.Create();
168 glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); 216 glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
169 217
@@ -174,7 +222,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
174 nullptr, 0); 222 nullptr, 0);
175 } 223 }
176 } 224 }
177
178 if (device.UseAsynchronousShaders()) { 225 if (device.UseAsynchronousShaders()) {
179 async_shaders.AllocateWorkers(); 226 async_shaders.AllocateWorkers();
180 } 227 }
@@ -186,14 +233,6 @@ RasterizerOpenGL::~RasterizerOpenGL() {
186 } 233 }
187} 234}
188 235
189void RasterizerOpenGL::CheckExtensions() {
190 if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
191 LOG_WARNING(
192 Render_OpenGL,
193 "Anisotropic filter is not supported! This can cause graphical issues in some games.");
194 }
195}
196
197void RasterizerOpenGL::SetupVertexFormat() { 236void RasterizerOpenGL::SetupVertexFormat() {
198 auto& flags = maxwell3d.dirty.flags; 237 auto& flags = maxwell3d.dirty.flags;
199 if (!flags[Dirty::VertexFormats]) { 238 if (!flags[Dirty::VertexFormats]) {
@@ -316,10 +355,16 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
316 return info.offset; 355 return info.offset;
317} 356}
318 357
319void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { 358void RasterizerOpenGL::SetupShaders() {
320 MICROPROFILE_SCOPE(OpenGL_Shader); 359 MICROPROFILE_SCOPE(OpenGL_Shader);
321 u32 clip_distances = 0; 360 u32 clip_distances = 0;
322 361
362 std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
363 image_view_indices.clear();
364 sampler_handles.clear();
365
366 texture_cache.SynchronizeGraphicsDescriptors();
367
323 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 368 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
324 const auto& shader_config = maxwell3d.regs.shader_config[index]; 369 const auto& shader_config = maxwell3d.regs.shader_config[index];
325 const auto program{static_cast<Maxwell::ShaderProgram>(index)}; 370 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -338,7 +383,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
338 } 383 }
339 continue; 384 continue;
340 } 385 }
341
342 // Currently this stages are not supported in the OpenGL backend. 386 // Currently this stages are not supported in the OpenGL backend.
343 // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL 387 // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
344 if (program == Maxwell::ShaderProgram::TesselationControl || 388 if (program == Maxwell::ShaderProgram::TesselationControl ||
@@ -347,7 +391,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
347 } 391 }
348 392
349 Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); 393 Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
350
351 const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; 394 const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
352 switch (program) { 395 switch (program) {
353 case Maxwell::ShaderProgram::VertexA: 396 case Maxwell::ShaderProgram::VertexA:
@@ -363,14 +406,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
363 default: 406 default:
364 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, 407 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
365 shader_config.enable.Value(), shader_config.offset); 408 shader_config.enable.Value(), shader_config.offset);
409 break;
366 } 410 }
367 411
368 // Stage indices are 0 - 5 412 // Stage indices are 0 - 5
369 const std::size_t stage = index == 0 ? 0 : index - 1; 413 const size_t stage = index == 0 ? 0 : index - 1;
414 shaders[stage] = shader;
415
370 SetupDrawConstBuffers(stage, shader); 416 SetupDrawConstBuffers(stage, shader);
371 SetupDrawGlobalMemory(stage, shader); 417 SetupDrawGlobalMemory(stage, shader);
372 SetupDrawTextures(stage, shader); 418 SetupDrawTextures(shader, stage);
373 SetupDrawImages(stage, shader); 419 SetupDrawImages(shader, stage);
374 420
375 // Workaround for Intel drivers. 421 // Workaround for Intel drivers.
376 // When a clip distance is enabled but not set in the shader it crops parts of the screen 422 // When a clip distance is enabled but not set in the shader it crops parts of the screen
@@ -384,9 +430,23 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
384 ++index; 430 ++index;
385 } 431 }
386 } 432 }
387
388 SyncClipEnabled(clip_distances); 433 SyncClipEnabled(clip_distances);
389 maxwell3d.dirty.flags[Dirty::Shaders] = false; 434 maxwell3d.dirty.flags[Dirty::Shaders] = false;
435
436 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
437 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
438
439 size_t image_view_index = 0;
440 size_t texture_index = 0;
441 size_t image_index = 0;
442 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
443 const Shader* const shader = shaders[stage];
444 if (shader) {
445 const auto base = device.GetBaseBindings(stage);
446 BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
447 texture_index, image_index);
448 }
449 }
390} 450}
391 451
392std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { 452std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -417,98 +477,6 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
417 shader_cache.LoadDiskCache(title_id, stop_loading, callback); 477 shader_cache.LoadDiskCache(title_id, stop_loading, callback);
418} 478}
419 479
420void RasterizerOpenGL::ConfigureFramebuffers() {
421 MICROPROFILE_SCOPE(OpenGL_Framebuffer);
422 if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
423 return;
424 }
425 maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
426
427 texture_cache.GuardRenderTargets(true);
428
429 View depth_surface = texture_cache.GetDepthBufferSurface(true);
430
431 const auto& regs = maxwell3d.regs;
432 UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
433
434 // Bind the framebuffer surfaces
435 FramebufferCacheKey key;
436 const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
437 for (std::size_t index = 0; index < colors_count; ++index) {
438 View color_surface{texture_cache.GetColorBufferSurface(index, true)};
439 if (!color_surface) {
440 continue;
441 }
442 // Assume that a surface will be written to if it is used as a framebuffer, even
443 // if the shader doesn't actually write to it.
444 texture_cache.MarkColorBufferInUse(index);
445
446 key.SetAttachment(index, regs.rt_control.GetMap(index));
447 key.colors[index] = std::move(color_surface);
448 }
449
450 if (depth_surface) {
451 // Assume that a surface will be written to if it is used as a framebuffer, even if
452 // the shader doesn't actually write to it.
453 texture_cache.MarkDepthBufferInUse();
454 key.zeta = std::move(depth_surface);
455 }
456
457 texture_cache.GuardRenderTargets(false);
458
459 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
460}
461
462void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
463 const auto& regs = maxwell3d.regs;
464
465 texture_cache.GuardRenderTargets(true);
466 View color_surface;
467
468 if (using_color) {
469 // Determine if we have to preserve the contents.
470 // First we have to make sure all clear masks are enabled.
471 bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G ||
472 !regs.clear_buffers.B || !regs.clear_buffers.A;
473 const std::size_t index = regs.clear_buffers.RT;
474 if (regs.clear_flags.scissor) {
475 // Then we have to confirm scissor testing clears the whole image.
476 const auto& scissor = regs.scissor_test[0];
477 preserve_contents |= scissor.min_x > 0;
478 preserve_contents |= scissor.min_y > 0;
479 preserve_contents |= scissor.max_x < regs.rt[index].width;
480 preserve_contents |= scissor.max_y < regs.rt[index].height;
481 }
482
483 color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents);
484 texture_cache.MarkColorBufferInUse(index);
485 }
486
487 View depth_surface;
488 if (using_depth_stencil) {
489 bool preserve_contents = false;
490 if (regs.clear_flags.scissor) {
491 // For depth stencil clears we only have to confirm scissor test covers the whole image.
492 const auto& scissor = regs.scissor_test[0];
493 preserve_contents |= scissor.min_x > 0;
494 preserve_contents |= scissor.min_y > 0;
495 preserve_contents |= scissor.max_x < regs.zeta_width;
496 preserve_contents |= scissor.max_y < regs.zeta_height;
497 }
498
499 depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents);
500 texture_cache.MarkDepthBufferInUse();
501 }
502 texture_cache.GuardRenderTargets(false);
503
504 FramebufferCacheKey key;
505 key.colors[0] = std::move(color_surface);
506 key.zeta = std::move(depth_surface);
507
508 state_tracker.NotifyFramebuffer();
509 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
510}
511
512void RasterizerOpenGL::Clear() { 480void RasterizerOpenGL::Clear() {
513 if (!maxwell3d.ShouldExecute()) { 481 if (!maxwell3d.ShouldExecute()) {
514 return; 482 return;
@@ -523,8 +491,9 @@ void RasterizerOpenGL::Clear() {
523 regs.clear_buffers.A) { 491 regs.clear_buffers.A) {
524 use_color = true; 492 use_color = true;
525 493
526 state_tracker.NotifyColorMask0(); 494 const GLuint index = regs.clear_buffers.RT;
527 glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, 495 state_tracker.NotifyColorMask(index);
496 glColorMaski(index, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
528 regs.clear_buffers.B != 0, regs.clear_buffers.A != 0); 497 regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);
529 498
530 // TODO(Rodrigo): Determine if clamping is used on clears 499 // TODO(Rodrigo): Determine if clamping is used on clears
@@ -557,15 +526,17 @@ void RasterizerOpenGL::Clear() {
557 state_tracker.NotifyScissor0(); 526 state_tracker.NotifyScissor0();
558 glDisablei(GL_SCISSOR_TEST, 0); 527 glDisablei(GL_SCISSOR_TEST, 0);
559 } 528 }
560
561 UNIMPLEMENTED_IF(regs.clear_flags.viewport); 529 UNIMPLEMENTED_IF(regs.clear_flags.viewport);
562 530
563 ConfigureClearFramebuffer(use_color, use_depth || use_stencil); 531 {
532 auto lock = texture_cache.AcquireLock();
533 texture_cache.UpdateRenderTargets(true);
534 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
535 }
564 536
565 if (use_color) { 537 if (use_color) {
566 glClearBufferfv(GL_COLOR, 0, regs.clear_color); 538 glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
567 } 539 }
568
569 if (use_depth && use_stencil) { 540 if (use_depth && use_stencil) {
570 glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); 541 glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
571 } else if (use_depth) { 542 } else if (use_depth) {
@@ -622,16 +593,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
622 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); 593 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
623 594
624 // Prepare the vertex array. 595 // Prepare the vertex array.
625 const bool invalidated = buffer_cache.Map(buffer_size); 596 buffer_cache.Map(buffer_size);
626
627 if (invalidated) {
628 // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
629 auto& dirty = maxwell3d.dirty.flags;
630 dirty[Dirty::VertexBuffers] = true;
631 for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
632 dirty[index] = true;
633 }
634 }
635 597
636 // Prepare vertex array format. 598 // Prepare vertex array format.
637 SetupVertexFormat(); 599 SetupVertexFormat();
@@ -655,22 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
655 } 617 }
656 618
657 // Setup shaders and their used resources. 619 // Setup shaders and their used resources.
658 texture_cache.GuardSamplers(true); 620 auto lock = texture_cache.AcquireLock();
659 const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); 621 SetupShaders();
660 SetupShaders(primitive_mode);
661 texture_cache.GuardSamplers(false);
662
663 ConfigureFramebuffers();
664 622
665 // Signal the buffer cache that we are not going to upload more things. 623 // Signal the buffer cache that we are not going to upload more things.
666 buffer_cache.Unmap(); 624 buffer_cache.Unmap();
667 625 texture_cache.UpdateRenderTargets(false);
626 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
668 program_manager.BindGraphicsPipeline(); 627 program_manager.BindGraphicsPipeline();
669 628
670 if (texture_cache.TextureBarrier()) { 629 const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
671 glTextureBarrier();
672 }
673
674 BeginTransformFeedback(primitive_mode); 630 BeginTransformFeedback(primitive_mode);
675 631
676 const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance); 632 const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
@@ -722,15 +678,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
722 buffer_cache.Acquire(); 678 buffer_cache.Acquire();
723 current_cbuf = 0; 679 current_cbuf = 0;
724 680
725 auto kernel = shader_cache.GetComputeKernel(code_addr); 681 Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
726 program_manager.BindCompute(kernel->GetHandle());
727 682
728 SetupComputeTextures(kernel); 683 auto lock = texture_cache.AcquireLock();
729 SetupComputeImages(kernel); 684 BindComputeTextures(kernel);
730 685
731 const std::size_t buffer_size = 686 const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers *
732 Tegra::Engines::KeplerCompute::NumConstBuffers * 687 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
733 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
734 buffer_cache.Map(buffer_size); 688 buffer_cache.Map(buffer_size);
735 689
736 SetupComputeConstBuffers(kernel); 690 SetupComputeConstBuffers(kernel);
@@ -739,7 +693,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
739 buffer_cache.Unmap(); 693 buffer_cache.Unmap();
740 694
741 const auto& launch_desc = kepler_compute.launch_description; 695 const auto& launch_desc = kepler_compute.launch_description;
742 program_manager.BindCompute(kernel->GetHandle());
743 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); 696 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
744 ++num_queued_commands; 697 ++num_queued_commands;
745} 698}
@@ -760,7 +713,10 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
760 if (addr == 0 || size == 0) { 713 if (addr == 0 || size == 0) {
761 return; 714 return;
762 } 715 }
763 texture_cache.FlushRegion(addr, size); 716 {
717 auto lock = texture_cache.AcquireLock();
718 texture_cache.DownloadMemory(addr, size);
719 }
764 buffer_cache.FlushRegion(addr, size); 720 buffer_cache.FlushRegion(addr, size);
765 query_cache.FlushRegion(addr, size); 721 query_cache.FlushRegion(addr, size);
766} 722}
@@ -769,7 +725,8 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
769 if (!Settings::IsGPULevelHigh()) { 725 if (!Settings::IsGPULevelHigh()) {
770 return buffer_cache.MustFlushRegion(addr, size); 726 return buffer_cache.MustFlushRegion(addr, size);
771 } 727 }
772 return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); 728 return texture_cache.IsRegionGpuModified(addr, size) ||
729 buffer_cache.MustFlushRegion(addr, size);
773} 730}
774 731
775void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { 732void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -777,7 +734,10 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
777 if (addr == 0 || size == 0) { 734 if (addr == 0 || size == 0) {
778 return; 735 return;
779 } 736 }
780 texture_cache.InvalidateRegion(addr, size); 737 {
738 auto lock = texture_cache.AcquireLock();
739 texture_cache.WriteMemory(addr, size);
740 }
781 shader_cache.InvalidateRegion(addr, size); 741 shader_cache.InvalidateRegion(addr, size);
782 buffer_cache.InvalidateRegion(addr, size); 742 buffer_cache.InvalidateRegion(addr, size);
783 query_cache.InvalidateRegion(addr, size); 743 query_cache.InvalidateRegion(addr, size);
@@ -788,18 +748,29 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
788 if (addr == 0 || size == 0) { 748 if (addr == 0 || size == 0) {
789 return; 749 return;
790 } 750 }
791 texture_cache.OnCPUWrite(addr, size); 751 {
752 auto lock = texture_cache.AcquireLock();
753 texture_cache.WriteMemory(addr, size);
754 }
792 shader_cache.OnCPUWrite(addr, size); 755 shader_cache.OnCPUWrite(addr, size);
793 buffer_cache.OnCPUWrite(addr, size); 756 buffer_cache.OnCPUWrite(addr, size);
794} 757}
795 758
796void RasterizerOpenGL::SyncGuestHost() { 759void RasterizerOpenGL::SyncGuestHost() {
797 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 760 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
798 texture_cache.SyncGuestHost();
799 buffer_cache.SyncGuestHost(); 761 buffer_cache.SyncGuestHost();
800 shader_cache.SyncGuestHost(); 762 shader_cache.SyncGuestHost();
801} 763}
802 764
765void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
766 {
767 auto lock = texture_cache.AcquireLock();
768 texture_cache.UnmapMemory(addr, size);
769 }
770 buffer_cache.OnCPUWrite(addr, size);
771 shader_cache.OnCPUWrite(addr, size);
772}
773
803void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { 774void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
804 if (!gpu.IsAsync()) { 775 if (!gpu.IsAsync()) {
805 gpu_memory.Write<u32>(addr, value); 776 gpu_memory.Write<u32>(addr, value);
@@ -841,6 +812,14 @@ void RasterizerOpenGL::WaitForIdle() {
841 GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT); 812 GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
842} 813}
843 814
815void RasterizerOpenGL::FragmentBarrier() {
816 glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT);
817}
818
819void RasterizerOpenGL::TiledCacheBarrier() {
820 glTextureBarrier();
821}
822
844void RasterizerOpenGL::FlushCommands() { 823void RasterizerOpenGL::FlushCommands() {
845 // Only flush when we have commands queued to OpenGL. 824 // Only flush when we have commands queued to OpenGL.
846 if (num_queued_commands == 0) { 825 if (num_queued_commands == 0) {
@@ -854,45 +833,95 @@ void RasterizerOpenGL::TickFrame() {
854 // Ticking a frame means that buffers will be swapped, calling glFlush implicitly. 833 // Ticking a frame means that buffers will be swapped, calling glFlush implicitly.
855 num_queued_commands = 0; 834 num_queued_commands = 0;
856 835
836 fence_manager.TickFrame();
857 buffer_cache.TickFrame(); 837 buffer_cache.TickFrame();
838 {
839 auto lock = texture_cache.AcquireLock();
840 texture_cache.TickFrame();
841 }
858} 842}
859 843
860bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 844bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
861 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 845 const Tegra::Engines::Fermi2D::Surface& dst,
862 const Tegra::Engines::Fermi2D::Config& copy_config) { 846 const Tegra::Engines::Fermi2D::Config& copy_config) {
863 MICROPROFILE_SCOPE(OpenGL_Blits); 847 MICROPROFILE_SCOPE(OpenGL_Blits);
864 texture_cache.DoFermiCopy(src, dst, copy_config); 848 auto lock = texture_cache.AcquireLock();
849 texture_cache.BlitImage(dst, src, copy_config);
865 return true; 850 return true;
866} 851}
867 852
868bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, 853bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
869 VAddr framebuffer_addr, u32 pixel_stride) { 854 VAddr framebuffer_addr, u32 pixel_stride) {
870 if (!framebuffer_addr) { 855 if (framebuffer_addr == 0) {
871 return {}; 856 return false;
872 } 857 }
873
874 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 858 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
875 859
876 const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; 860 auto lock = texture_cache.AcquireLock();
877 if (!surface) { 861 ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
878 return {}; 862 if (!image_view) {
863 return false;
879 } 864 }
880
881 // Verify that the cached surface is the same size and format as the requested framebuffer 865 // Verify that the cached surface is the same size and format as the requested framebuffer
882 const auto& params{surface->GetSurfaceParams()}; 866 // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
883 const auto& pixel_format{ 867 // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
884 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
885 ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
886 ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
887 868
888 if (params.pixel_format != pixel_format) { 869 screen_info.display_texture = image_view->Handle(ImageViewType::e2D);
889 LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different"); 870 screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
890 } 871 return true;
872}
891 873
892 screen_info.display_texture = surface->GetTexture(); 874void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
893 screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion; 875 image_view_indices.clear();
876 sampler_handles.clear();
894 877
895 return true; 878 texture_cache.SynchronizeComputeDescriptors();
879
880 SetupComputeTextures(kernel);
881 SetupComputeImages(kernel);
882
883 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
884 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
885
886 program_manager.BindCompute(kernel->GetHandle());
887 size_t image_view_index = 0;
888 size_t texture_index = 0;
889 size_t image_index = 0;
890 BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
891}
892
893void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
894 GLuint base_image, size_t& image_view_index,
895 size_t& texture_index, size_t& image_index) {
896 const GLuint* const samplers = sampler_handles.data() + texture_index;
897 const GLuint* const textures = texture_handles.data() + texture_index;
898 const GLuint* const images = image_handles.data() + image_index;
899
900 const size_t num_samplers = entries.samplers.size();
901 for (const auto& sampler : entries.samplers) {
902 for (size_t i = 0; i < sampler.size; ++i) {
903 const ImageViewId image_view_id = image_view_ids[image_view_index++];
904 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
905 const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
906 texture_handles[texture_index++] = handle;
907 }
908 }
909 const size_t num_images = entries.images.size();
910 for (size_t unit = 0; unit < num_images; ++unit) {
911 // TODO: Mark as modified
912 const ImageViewId image_view_id = image_view_ids[image_view_index++];
913 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
914 const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
915 image_handles[image_index] = handle;
916 ++image_index;
917 }
918 if (num_samplers > 0) {
919 glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
920 glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
921 }
922 if (num_images > 0) {
923 glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
924 }
896} 925}
897 926
898void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { 927void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
@@ -999,7 +1028,6 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
999 GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, 1028 GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
1000 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, 1029 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
1001 }; 1030 };
1002
1003 const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; 1031 const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
1004 const auto& entries{shader->GetEntries().global_memory_entries}; 1032 const auto& entries{shader->GetEntries().global_memory_entries};
1005 1033
@@ -1056,77 +1084,53 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e
1056 } 1084 }
1057} 1085}
1058 1086
1059void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { 1087void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
1060 MICROPROFILE_SCOPE(OpenGL_Texture); 1088 const bool via_header_index =
1061 u32 binding = device.GetBaseBindings(stage_index).sampler; 1089 maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1062 for (const auto& entry : shader->GetEntries().samplers) { 1090 for (const auto& entry : shader->GetEntries().samplers) {
1063 const auto shader_type = static_cast<ShaderType>(stage_index); 1091 const auto shader_type = static_cast<ShaderType>(stage_index);
1064 for (std::size_t i = 0; i < entry.size; ++i) { 1092 for (size_t index = 0; index < entry.size; ++index) {
1065 const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); 1093 const auto handle =
1066 SetupTexture(binding++, texture, entry); 1094 GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
1095 const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
1096 sampler_handles.push_back(sampler->Handle());
1097 image_view_indices.push_back(handle.image);
1067 } 1098 }
1068 } 1099 }
1069} 1100}
1070 1101
1071void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) { 1102void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
1072 MICROPROFILE_SCOPE(OpenGL_Texture); 1103 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1073 u32 binding = 0;
1074 for (const auto& entry : kernel->GetEntries().samplers) { 1104 for (const auto& entry : kernel->GetEntries().samplers) {
1075 for (std::size_t i = 0; i < entry.size; ++i) { 1105 for (size_t i = 0; i < entry.size; ++i) {
1076 const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i); 1106 const auto handle =
1077 SetupTexture(binding++, texture, entry); 1107 GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
1108 const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
1109 sampler_handles.push_back(sampler->Handle());
1110 image_view_indices.push_back(handle.image);
1078 } 1111 }
1079 } 1112 }
1080} 1113}
1081 1114
1082void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, 1115void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
1083 const SamplerEntry& entry) { 1116 const bool via_header_index =
1084 const auto view = texture_cache.GetTextureSurface(texture.tic, entry); 1117 maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1085 if (!view) {
1086 // Can occur when texture addr is null or its memory is unmapped/invalid
1087 glBindSampler(binding, 0);
1088 glBindTextureUnit(binding, 0);
1089 return;
1090 }
1091 const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source,
1092 texture.tic.z_source, texture.tic.w_source);
1093 glBindTextureUnit(binding, handle);
1094 if (!view->GetSurfaceParams().IsBuffer()) {
1095 glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
1096 }
1097}
1098
1099void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
1100 u32 binding = device.GetBaseBindings(stage_index).image;
1101 for (const auto& entry : shader->GetEntries().images) { 1118 for (const auto& entry : shader->GetEntries().images) {
1102 const auto shader_type = static_cast<ShaderType>(stage_index); 1119 const auto shader_type = static_cast<ShaderType>(stage_index);
1103 const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; 1120 const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
1104 SetupImage(binding++, tic, entry); 1121 image_view_indices.push_back(handle.image);
1105 } 1122 }
1106} 1123}
1107 1124
1108void RasterizerOpenGL::SetupComputeImages(Shader* shader) { 1125void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
1109 u32 binding = 0; 1126 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1110 for (const auto& entry : shader->GetEntries().images) { 1127 for (const auto& entry : shader->GetEntries().images) {
1111 const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic; 1128 const auto handle =
1112 SetupImage(binding++, tic, entry); 1129 GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
1130 image_view_indices.push_back(handle.image);
1113 } 1131 }
1114} 1132}
1115 1133
1116void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
1117 const ImageEntry& entry) {
1118 const auto view = texture_cache.GetImageSurface(tic, entry);
1119 if (!view) {
1120 glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
1121 return;
1122 }
1123 if (entry.is_written) {
1124 view->MarkAsModified(texture_cache.Tick());
1125 }
1126 const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
1127 glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat());
1128}
1129
1130void RasterizerOpenGL::SyncViewport() { 1134void RasterizerOpenGL::SyncViewport() {
1131 auto& flags = maxwell3d.dirty.flags; 1135 auto& flags = maxwell3d.dirty.flags;
1132 const auto& regs = maxwell3d.regs; 1136 const auto& regs = maxwell3d.regs;
@@ -1526,17 +1530,9 @@ void RasterizerOpenGL::SyncPointState() {
1526 flags[Dirty::PointSize] = false; 1530 flags[Dirty::PointSize] = false;
1527 1531
1528 oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); 1532 oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
1533 oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable);
1529 1534
1530 if (maxwell3d.regs.vp_point_size.enable) {
1531 // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
1532 glEnable(GL_PROGRAM_POINT_SIZE);
1533 return;
1534 }
1535
1536 // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
1537 // in OpenGL).
1538 glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); 1535 glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
1539 glDisable(GL_PROGRAM_POINT_SIZE);
1540} 1536}
1541 1537
1542void RasterizerOpenGL::SyncLineState() { 1538void RasterizerOpenGL::SyncLineState() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index de28cff15..82e03e677 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -7,12 +7,13 @@
7#include <array> 7#include <array>
8#include <atomic> 8#include <atomic>
9#include <cstddef> 9#include <cstddef>
10#include <map>
11#include <memory> 10#include <memory>
12#include <optional> 11#include <optional>
13#include <tuple> 12#include <tuple>
14#include <utility> 13#include <utility>
15 14
15#include <boost/container/static_vector.hpp>
16
16#include <glad/glad.h> 17#include <glad/glad.h>
17 18
18#include "common/common_types.h" 19#include "common/common_types.h"
@@ -23,16 +24,14 @@
23#include "video_core/renderer_opengl/gl_buffer_cache.h" 24#include "video_core/renderer_opengl/gl_buffer_cache.h"
24#include "video_core/renderer_opengl/gl_device.h" 25#include "video_core/renderer_opengl/gl_device.h"
25#include "video_core/renderer_opengl/gl_fence_manager.h" 26#include "video_core/renderer_opengl/gl_fence_manager.h"
26#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
27#include "video_core/renderer_opengl/gl_query_cache.h" 27#include "video_core/renderer_opengl/gl_query_cache.h"
28#include "video_core/renderer_opengl/gl_resource_manager.h" 28#include "video_core/renderer_opengl/gl_resource_manager.h"
29#include "video_core/renderer_opengl/gl_sampler_cache.h"
30#include "video_core/renderer_opengl/gl_shader_cache.h" 29#include "video_core/renderer_opengl/gl_shader_cache.h"
31#include "video_core/renderer_opengl/gl_shader_decompiler.h" 30#include "video_core/renderer_opengl/gl_shader_decompiler.h"
32#include "video_core/renderer_opengl/gl_shader_manager.h" 31#include "video_core/renderer_opengl/gl_shader_manager.h"
33#include "video_core/renderer_opengl/gl_state_tracker.h" 32#include "video_core/renderer_opengl/gl_state_tracker.h"
33#include "video_core/renderer_opengl/gl_stream_buffer.h"
34#include "video_core/renderer_opengl/gl_texture_cache.h" 34#include "video_core/renderer_opengl/gl_texture_cache.h"
35#include "video_core/renderer_opengl/utils.h"
36#include "video_core/shader/async_shaders.h" 35#include "video_core/shader/async_shaders.h"
37#include "video_core/textures/texture.h" 36#include "video_core/textures/texture.h"
38 37
@@ -51,7 +50,7 @@ class MemoryManager;
51namespace OpenGL { 50namespace OpenGL {
52 51
53struct ScreenInfo; 52struct ScreenInfo;
54struct DrawParameters; 53struct ShaderEntries;
55 54
56struct BindlessSSBO { 55struct BindlessSSBO {
57 GLuint64EXT address; 56 GLuint64EXT address;
@@ -79,15 +78,18 @@ public:
79 void InvalidateRegion(VAddr addr, u64 size) override; 78 void InvalidateRegion(VAddr addr, u64 size) override;
80 void OnCPUWrite(VAddr addr, u64 size) override; 79 void OnCPUWrite(VAddr addr, u64 size) override;
81 void SyncGuestHost() override; 80 void SyncGuestHost() override;
81 void UnmapMemory(VAddr addr, u64 size) override;
82 void SignalSemaphore(GPUVAddr addr, u32 value) override; 82 void SignalSemaphore(GPUVAddr addr, u32 value) override;
83 void SignalSyncPoint(u32 value) override; 83 void SignalSyncPoint(u32 value) override;
84 void ReleaseFences() override; 84 void ReleaseFences() override;
85 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 85 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
86 void WaitForIdle() override; 86 void WaitForIdle() override;
87 void FragmentBarrier() override;
88 void TiledCacheBarrier() override;
87 void FlushCommands() override; 89 void FlushCommands() override;
88 void TickFrame() override; 90 void TickFrame() override;
89 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 91 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
90 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 92 const Tegra::Engines::Fermi2D::Surface& dst,
91 const Tegra::Engines::Fermi2D::Config& copy_config) override; 93 const Tegra::Engines::Fermi2D::Config& copy_config) override;
92 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 94 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
93 u32 pixel_stride) override; 95 u32 pixel_stride) override;
@@ -108,11 +110,14 @@ public:
108 } 110 }
109 111
110private: 112private:
111 /// Configures the color and depth framebuffer states. 113 static constexpr size_t MAX_TEXTURES = 192;
112 void ConfigureFramebuffers(); 114 static constexpr size_t MAX_IMAGES = 48;
115 static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
116
117 void BindComputeTextures(Shader* kernel);
113 118
114 /// Configures the color and depth framebuffer for clearing. 119 void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
115 void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil); 120 size_t& image_view_index, size_t& texture_index, size_t& image_index);
116 121
117 /// Configures the current constbuffers to use for the draw command. 122 /// Configures the current constbuffers to use for the draw command.
118 void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader); 123 void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
@@ -136,23 +141,16 @@ private:
136 size_t size, BindlessSSBO* ssbo); 141 size_t size, BindlessSSBO* ssbo);
137 142
138 /// Configures the current textures to use for the draw command. 143 /// Configures the current textures to use for the draw command.
139 void SetupDrawTextures(std::size_t stage_index, Shader* shader); 144 void SetupDrawTextures(const Shader* shader, size_t stage_index);
140 145
141 /// Configures the textures used in a compute shader. 146 /// Configures the textures used in a compute shader.
142 void SetupComputeTextures(Shader* kernel); 147 void SetupComputeTextures(const Shader* kernel);
143
144 /// Configures a texture.
145 void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
146 const SamplerEntry& entry);
147 148
148 /// Configures images in a graphics shader. 149 /// Configures images in a graphics shader.
149 void SetupDrawImages(std::size_t stage_index, Shader* shader); 150 void SetupDrawImages(const Shader* shader, size_t stage_index);
150 151
151 /// Configures images in a compute shader. 152 /// Configures images in a compute shader.
152 void SetupComputeImages(Shader* shader); 153 void SetupComputeImages(const Shader* shader);
153
154 /// Configures an image.
155 void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
156 154
157 /// Syncs the viewport and depth range to match the guest state 155 /// Syncs the viewport and depth range to match the guest state
158 void SyncViewport(); 156 void SyncViewport();
@@ -227,9 +225,6 @@ private:
227 /// End a transform feedback 225 /// End a transform feedback
228 void EndTransformFeedback(); 226 void EndTransformFeedback();
229 227
230 /// Check for extension that are not strictly required but are needed for correct emulation
231 void CheckExtensions();
232
233 std::size_t CalculateVertexArraysSize() const; 228 std::size_t CalculateVertexArraysSize() const;
234 229
235 std::size_t CalculateIndexBufferSize() const; 230 std::size_t CalculateIndexBufferSize() const;
@@ -242,7 +237,7 @@ private:
242 237
243 GLintptr SetupIndexBuffer(); 238 GLintptr SetupIndexBuffer();
244 239
245 void SetupShaders(GLenum primitive_mode); 240 void SetupShaders();
246 241
247 Tegra::GPU& gpu; 242 Tegra::GPU& gpu;
248 Tegra::Engines::Maxwell3D& maxwell3d; 243 Tegra::Engines::Maxwell3D& maxwell3d;
@@ -254,19 +249,21 @@ private:
254 ProgramManager& program_manager; 249 ProgramManager& program_manager;
255 StateTracker& state_tracker; 250 StateTracker& state_tracker;
256 251
257 TextureCacheOpenGL texture_cache; 252 OGLStreamBuffer stream_buffer;
253 TextureCacheRuntime texture_cache_runtime;
254 TextureCache texture_cache;
258 ShaderCacheOpenGL shader_cache; 255 ShaderCacheOpenGL shader_cache;
259 SamplerCacheOpenGL sampler_cache;
260 FramebufferCacheOpenGL framebuffer_cache;
261 QueryCache query_cache; 256 QueryCache query_cache;
262 OGLBufferCache buffer_cache; 257 OGLBufferCache buffer_cache;
263 FenceManagerOpenGL fence_manager; 258 FenceManagerOpenGL fence_manager;
264 259
265 VideoCommon::Shader::AsyncShaders async_shaders; 260 VideoCommon::Shader::AsyncShaders async_shaders;
266 261
267 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; 262 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
268 263 std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
269 GLint vertex_binding = 0; 264 boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
265 std::array<GLuint, MAX_TEXTURES> texture_handles;
266 std::array<GLuint, MAX_IMAGES> image_handles;
270 267
271 std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> 268 std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
272 transform_feedback_buffers; 269 transform_feedback_buffers;
@@ -280,7 +277,7 @@ private:
280 std::size_t current_cbuf = 0; 277 std::size_t current_cbuf = 0;
281 OGLBuffer unified_uniform_buffer; 278 OGLBuffer unified_uniform_buffer;
282 279
283 /// Number of commands queued to the OpenGL driver. Reseted on flush. 280 /// Number of commands queued to the OpenGL driver. Resetted on flush.
284 std::size_t num_queued_commands = 0; 281 std::size_t num_queued_commands = 0;
285 282
286 u32 last_clip_distance_mask = 0; 283 u32 last_clip_distance_mask = 0;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 0ebcec427..0e34a0f20 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -71,7 +71,7 @@ void OGLSampler::Create() {
71 return; 71 return;
72 72
73 MICROPROFILE_SCOPE(OpenGL_ResourceCreation); 73 MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
74 glGenSamplers(1, &handle); 74 glCreateSamplers(1, &handle);
75} 75}
76 76
77void OGLSampler::Release() { 77void OGLSampler::Release() {
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
deleted file mode 100644
index 5c174879a..000000000
--- a/src/video_core/renderer_opengl/gl_sampler_cache.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/logging/log.h"
6#include "video_core/renderer_opengl/gl_resource_manager.h"
7#include "video_core/renderer_opengl/gl_sampler_cache.h"
8#include "video_core/renderer_opengl/maxwell_to_gl.h"
9
10namespace OpenGL {
11
12SamplerCacheOpenGL::SamplerCacheOpenGL() = default;
13
14SamplerCacheOpenGL::~SamplerCacheOpenGL() = default;
15
16OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
17 OGLSampler sampler;
18 sampler.Create();
19
20 const GLuint sampler_id{sampler.handle};
21 glSamplerParameteri(
22 sampler_id, GL_TEXTURE_MAG_FILTER,
23 MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None));
24 glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
25 MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter));
26 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u));
27 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v));
28 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p));
29 glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
30 tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
31 glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
32 MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func));
33 glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data());
34 glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod());
35 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod());
36 glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias());
37 if (GLAD_GL_ARB_texture_filter_anisotropic) {
38 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy());
39 } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
40 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy());
41 } else {
42 LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver");
43 }
44
45 return sampler;
46}
47
48GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const {
49 return sampler.handle;
50}
51
52} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h
deleted file mode 100644
index 34ee37f00..000000000
--- a/src/video_core/renderer_opengl/gl_sampler_cache.h
+++ /dev/null
@@ -1,25 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <glad/glad.h>
8
9#include "video_core/renderer_opengl/gl_resource_manager.h"
10#include "video_core/sampler_cache.h"
11
12namespace OpenGL {
13
14class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> {
15public:
16 explicit SamplerCacheOpenGL();
17 ~SamplerCacheOpenGL();
18
19protected:
20 OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
21
22 GLuint ToSamplerType(const OGLSampler& sampler) const override;
23};
24
25} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index eabfdea5d..d4841fdb7 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -27,7 +27,6 @@
27#include "video_core/renderer_opengl/gl_shader_decompiler.h" 27#include "video_core/renderer_opengl/gl_shader_decompiler.h"
28#include "video_core/renderer_opengl/gl_shader_disk_cache.h" 28#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
29#include "video_core/renderer_opengl/gl_state_tracker.h" 29#include "video_core/renderer_opengl/gl_state_tracker.h"
30#include "video_core/renderer_opengl/utils.h"
31#include "video_core/shader/memory_util.h" 30#include "video_core/shader/memory_util.h"
32#include "video_core/shader/registry.h" 31#include "video_core/shader/registry.h"
33#include "video_core/shader/shader_ir.h" 32#include "video_core/shader/shader_ir.h"
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index ccbdfe967..2e1fa252d 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -38,11 +38,9 @@ using Tegra::Shader::IpaSampleMode;
38using Tegra::Shader::PixelImap; 38using Tegra::Shader::PixelImap;
39using Tegra::Shader::Register; 39using Tegra::Shader::Register;
40using Tegra::Shader::TextureType; 40using Tegra::Shader::TextureType;
41using VideoCommon::Shader::BuildTransformFeedback;
42using VideoCommon::Shader::Registry;
43 41
44using namespace std::string_literals;
45using namespace VideoCommon::Shader; 42using namespace VideoCommon::Shader;
43using namespace std::string_literals;
46 44
47using Maxwell = Tegra::Engines::Maxwell3D::Regs; 45using Maxwell = Tegra::Engines::Maxwell3D::Regs;
48using Operation = const OperationNode&; 46using Operation = const OperationNode&;
@@ -2753,11 +2751,11 @@ private:
2753 } 2751 }
2754 } 2752 }
2755 2753
2756 std::string GetSampler(const Sampler& sampler) const { 2754 std::string GetSampler(const SamplerEntry& sampler) const {
2757 return AppendSuffix(sampler.index, "sampler"); 2755 return AppendSuffix(sampler.index, "sampler");
2758 } 2756 }
2759 2757
2760 std::string GetImage(const Image& image) const { 2758 std::string GetImage(const ImageEntry& image) const {
2761 return AppendSuffix(image.index, "image"); 2759 return AppendSuffix(image.index, "image");
2762 } 2760 }
2763 2761
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index c4ff47875..be68994bb 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -20,8 +20,8 @@ namespace OpenGL {
20class Device; 20class Device;
21 21
22using Maxwell = Tegra::Engines::Maxwell3D::Regs; 22using Maxwell = Tegra::Engines::Maxwell3D::Regs;
23using SamplerEntry = VideoCommon::Shader::Sampler; 23using SamplerEntry = VideoCommon::Shader::SamplerEntry;
24using ImageEntry = VideoCommon::Shader::Image; 24using ImageEntry = VideoCommon::Shader::ImageEntry;
25 25
26class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { 26class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
27public: 27public:
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 691c6c79b..553e6e8d6 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -83,6 +83,21 @@ void ProgramManager::RestoreGuestPipeline() {
83 } 83 }
84} 84}
85 85
86void ProgramManager::BindHostCompute(GLuint program) {
87 if (use_assembly_programs) {
88 glDisable(GL_COMPUTE_PROGRAM_NV);
89 }
90 glUseProgram(program);
91 is_graphics_bound = false;
92}
93
94void ProgramManager::RestoreGuestCompute() {
95 if (use_assembly_programs) {
96 glEnable(GL_COMPUTE_PROGRAM_NV);
97 glUseProgram(0);
98 }
99}
100
86void ProgramManager::UseVertexShader(GLuint program) { 101void ProgramManager::UseVertexShader(GLuint program) {
87 if (use_assembly_programs) { 102 if (use_assembly_programs) {
88 BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); 103 BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 950e0dfcb..ad42cce74 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -45,6 +45,12 @@ public:
45 /// Rewinds BindHostPipeline state changes. 45 /// Rewinds BindHostPipeline state changes.
46 void RestoreGuestPipeline(); 46 void RestoreGuestPipeline();
47 47
48 /// Binds an OpenGL GLSL program object unsynchronized with the guest state.
49 void BindHostCompute(GLuint program);
50
51 /// Rewinds BindHostCompute state changes.
52 void RestoreGuestCompute();
53
48 void UseVertexShader(GLuint program); 54 void UseVertexShader(GLuint program);
49 void UseGeometryShader(GLuint program); 55 void UseGeometryShader(GLuint program);
50 void UseFragmentShader(GLuint program); 56 void UseFragmentShader(GLuint program);
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index 45f4fc565..60e6fa39f 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -249,4 +249,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
249 } 249 }
250} 250}
251 251
252void StateTracker::InvalidateStreamBuffer() {
253 flags[Dirty::VertexBuffers] = true;
254 for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
255 flags[index] = true;
256 }
257}
258
252} // namespace OpenGL 259} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 9d127548f..574615d3c 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -92,6 +92,8 @@ class StateTracker {
92public: 92public:
93 explicit StateTracker(Tegra::GPU& gpu); 93 explicit StateTracker(Tegra::GPU& gpu);
94 94
95 void InvalidateStreamBuffer();
96
95 void BindIndexBuffer(GLuint new_index_buffer) { 97 void BindIndexBuffer(GLuint new_index_buffer) {
96 if (index_buffer == new_index_buffer) { 98 if (index_buffer == new_index_buffer) {
97 return; 99 return;
@@ -100,6 +102,14 @@ public:
100 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer); 102 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer);
101 } 103 }
102 104
105 void BindFramebuffer(GLuint new_framebuffer) {
106 if (framebuffer == new_framebuffer) {
107 return;
108 }
109 framebuffer = new_framebuffer;
110 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
111 }
112
103 void NotifyScreenDrawVertexArray() { 113 void NotifyScreenDrawVertexArray() {
104 flags[OpenGL::Dirty::VertexFormats] = true; 114 flags[OpenGL::Dirty::VertexFormats] = true;
105 flags[OpenGL::Dirty::VertexFormat0 + 0] = true; 115 flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
@@ -129,9 +139,9 @@ public:
129 flags[OpenGL::Dirty::Scissor0] = true; 139 flags[OpenGL::Dirty::Scissor0] = true;
130 } 140 }
131 141
132 void NotifyColorMask0() { 142 void NotifyColorMask(size_t index) {
133 flags[OpenGL::Dirty::ColorMasks] = true; 143 flags[OpenGL::Dirty::ColorMasks] = true;
134 flags[OpenGL::Dirty::ColorMask0] = true; 144 flags[OpenGL::Dirty::ColorMask0 + index] = true;
135 } 145 }
136 146
137 void NotifyBlend0() { 147 void NotifyBlend0() {
@@ -190,6 +200,7 @@ public:
190private: 200private:
191 Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; 201 Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
192 202
203 GLuint framebuffer = 0;
193 GLuint index_buffer = 0; 204 GLuint index_buffer = 0;
194}; 205};
195 206
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 887995cf4..e0819cdf2 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -9,6 +9,7 @@
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/microprofile.h" 10#include "common/microprofile.h"
11#include "video_core/renderer_opengl/gl_device.h" 11#include "video_core/renderer_opengl/gl_device.h"
12#include "video_core/renderer_opengl/gl_state_tracker.h"
12#include "video_core/renderer_opengl/gl_stream_buffer.h" 13#include "video_core/renderer_opengl/gl_stream_buffer.h"
13 14
14MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", 15MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
@@ -16,24 +17,14 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
16 17
17namespace OpenGL { 18namespace OpenGL {
18 19
19OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage) 20OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_)
20 : buffer_size(size) { 21 : state_tracker{state_tracker_} {
21 gl_buffer.Create(); 22 gl_buffer.Create();
22 23
23 GLsizeiptr allocate_size = size;
24 if (vertex_data_usage) {
25 // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
26 // read position is near the end and is an out-of-bound access to the vertex buffer. This is
27 // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
28 // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
29 // crash.
30 allocate_size *= 2;
31 }
32
33 static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; 24 static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
34 glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); 25 glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags);
35 mapped_ptr = static_cast<u8*>( 26 mapped_ptr = static_cast<u8*>(
36 glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); 27 glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
37 28
38 if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { 29 if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
39 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); 30 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
@@ -46,25 +37,24 @@ OGLStreamBuffer::~OGLStreamBuffer() {
46 gl_buffer.Release(); 37 gl_buffer.Release();
47} 38}
48 39
49std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { 40std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
50 ASSERT(size <= buffer_size); 41 ASSERT(size <= BUFFER_SIZE);
51 ASSERT(alignment <= buffer_size); 42 ASSERT(alignment <= BUFFER_SIZE);
52 mapped_size = size; 43 mapped_size = size;
53 44
54 if (alignment > 0) { 45 if (alignment > 0) {
55 buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment); 46 buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
56 } 47 }
57 48
58 bool invalidate = false; 49 if (buffer_pos + size > BUFFER_SIZE) {
59 if (buffer_pos + size > buffer_size) {
60 MICROPROFILE_SCOPE(OpenGL_StreamBuffer); 50 MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
61 glInvalidateBufferData(gl_buffer.handle); 51 glInvalidateBufferData(gl_buffer.handle);
52 state_tracker.InvalidateStreamBuffer();
62 53
63 buffer_pos = 0; 54 buffer_pos = 0;
64 invalidate = true;
65 } 55 }
66 56
67 return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate); 57 return std::make_pair(mapped_ptr + buffer_pos, buffer_pos);
68} 58}
69 59
70void OGLStreamBuffer::Unmap(GLsizeiptr size) { 60void OGLStreamBuffer::Unmap(GLsizeiptr size) {
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index 307a67113..dd9cf67eb 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -4,29 +4,31 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <tuple> 7#include <utility>
8
8#include <glad/glad.h> 9#include <glad/glad.h>
10
9#include "common/common_types.h" 11#include "common/common_types.h"
10#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
11 13
12namespace OpenGL { 14namespace OpenGL {
13 15
14class Device; 16class Device;
17class StateTracker;
15 18
16class OGLStreamBuffer : private NonCopyable { 19class OGLStreamBuffer : private NonCopyable {
17public: 20public:
18 explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage); 21 explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_);
19 ~OGLStreamBuffer(); 22 ~OGLStreamBuffer();
20 23
21 /* 24 /*
22 * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes 25 * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
23 * and the optional alignment requirement. 26 * and the optional alignment requirement.
24 * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. 27 * If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
25 * The return values are the pointer to the new chunk, the offset within the buffer, 28 * The return values are the pointer to the new chunk, and the offset within the buffer.
26 * and the invalidation flag for previous chunks.
27 * The actual used size must be specified on unmapping the chunk. 29 * The actual used size must be specified on unmapping the chunk.
28 */ 30 */
29 std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0); 31 std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
30 32
31 void Unmap(GLsizeiptr size); 33 void Unmap(GLsizeiptr size);
32 34
@@ -39,15 +41,18 @@ public:
39 } 41 }
40 42
41 GLsizeiptr Size() const noexcept { 43 GLsizeiptr Size() const noexcept {
42 return buffer_size; 44 return BUFFER_SIZE;
43 } 45 }
44 46
45private: 47private:
48 static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024;
49
50 StateTracker& state_tracker;
51
46 OGLBuffer gl_buffer; 52 OGLBuffer gl_buffer;
47 53
48 GLuint64EXT gpu_address = 0; 54 GLuint64EXT gpu_address = 0;
49 GLintptr buffer_pos = 0; 55 GLintptr buffer_pos = 0;
50 GLsizeiptr buffer_size = 0;
51 GLsizeiptr mapped_size = 0; 56 GLsizeiptr mapped_size = 0;
52 u8* mapped_ptr = nullptr; 57 u8* mapped_ptr = nullptr;
53}; 58};
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index daf352b50..546cb6d00 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -2,37 +2,60 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h" 5#include <algorithm>
6#include "common/bit_util.h" 6#include <array>
7#include "common/common_types.h" 7#include <bit>
8#include "common/microprofile.h" 8#include <string>
9#include "common/scope_exit.h" 9
10#include "core/core.h" 10#include <glad/glad.h>
11#include "video_core/morton.h" 11
12#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_device.h"
13#include "video_core/renderer_opengl/gl_shader_manager.h"
13#include "video_core/renderer_opengl/gl_state_tracker.h" 14#include "video_core/renderer_opengl/gl_state_tracker.h"
14#include "video_core/renderer_opengl/gl_texture_cache.h" 15#include "video_core/renderer_opengl/gl_texture_cache.h"
15#include "video_core/renderer_opengl/utils.h" 16#include "video_core/renderer_opengl/maxwell_to_gl.h"
16#include "video_core/texture_cache/surface_base.h" 17#include "video_core/renderer_opengl/util_shaders.h"
18#include "video_core/surface.h"
19#include "video_core/texture_cache/format_lookup_table.h"
20#include "video_core/texture_cache/samples_helper.h"
17#include "video_core/texture_cache/texture_cache.h" 21#include "video_core/texture_cache/texture_cache.h"
18#include "video_core/textures/convert.h" 22#include "video_core/textures/decoders.h"
19#include "video_core/textures/texture.h"
20 23
21namespace OpenGL { 24namespace OpenGL {
22 25
23using Tegra::Texture::SwizzleSource; 26namespace {
24using VideoCore::MortonSwizzleMode;
25 27
28using Tegra::Texture::SwizzleSource;
29using Tegra::Texture::TextureMipmapFilter;
30using Tegra::Texture::TextureType;
31using Tegra::Texture::TICEntry;
32using Tegra::Texture::TSCEntry;
33using VideoCommon::CalculateLevelStrideAlignment;
34using VideoCommon::ImageCopy;
35using VideoCommon::ImageFlagBits;
36using VideoCommon::ImageType;
37using VideoCommon::NUM_RT;
38using VideoCommon::SamplesLog2;
39using VideoCommon::SwizzleParameters;
40using VideoCore::Surface::BytesPerBlock;
41using VideoCore::Surface::IsPixelFormatASTC;
42using VideoCore::Surface::IsPixelFormatSRGB;
43using VideoCore::Surface::MaxPixelFormat;
26using VideoCore::Surface::PixelFormat; 44using VideoCore::Surface::PixelFormat;
27using VideoCore::Surface::SurfaceTarget;
28using VideoCore::Surface::SurfaceType; 45using VideoCore::Surface::SurfaceType;
29 46
30MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); 47struct CopyOrigin {
31MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); 48 GLint level;
32MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", 49 GLint x;
33 MP_RGB(128, 192, 128)); 50 GLint y;
51 GLint z;
52};
34 53
35namespace { 54struct CopyRegion {
55 GLsizei width;
56 GLsizei height;
57 GLsizei depth;
58};
36 59
37struct FormatTuple { 60struct FormatTuple {
38 GLenum internal_format; 61 GLenum internal_format;
@@ -40,7 +63,7 @@ struct FormatTuple {
40 GLenum type = GL_NONE; 63 GLenum type = GL_NONE;
41}; 64};
42 65
43constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ 66constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
44 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM 67 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
45 {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM 68 {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
46 {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT 69 {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
@@ -103,72 +126,113 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
103 {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM 126 {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
104 {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM 127 {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
105 {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM 128 {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
106 // Compressed sRGB formats 129 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
107 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB 130 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
108 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB 131 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
109 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB 132 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
110 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB 133 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
111 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM 134 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
112 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB 135 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
113 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB 136 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
114 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB 137 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
115 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB 138 {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
116 {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM 139 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
117 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB 140 {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
118 {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM 141 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
119 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB 142 {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
120 {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM 143 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
121 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB 144 {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
122 {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM 145 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
123 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB 146 {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
124 {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM 147 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
125 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB 148 {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
126 {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM 149 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
127 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB 150 {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
128 {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM 151 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
129 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB 152 {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
130 {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT 153 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
131 154 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
132 // Depth formats 155 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
133 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT 156 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
134 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
135
136 // DepthStencil formats
137 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
138 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
139 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, 157 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
140 GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT 158 GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
141}}; 159}};
142 160
161constexpr std::array ACCELERATED_FORMATS{
162 GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F,
163 GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI,
164 GL_RG16UI, GL_RG8UI, GL_R32UI, GL_R16UI, GL_R8UI, GL_RGBA32I,
165 GL_RGBA16I, GL_RGBA8I, GL_RG32I, GL_RG16I, GL_RG8I, GL_R32I,
166 GL_R16I, GL_R8I, GL_RGBA16, GL_RGB10_A2, GL_RGBA8, GL_RG16,
167 GL_RG8, GL_R16, GL_R8, GL_RGBA16_SNORM, GL_RGBA8_SNORM, GL_RG16_SNORM,
168 GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM,
169};
170
143const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { 171const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
144 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); 172 ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
145 return tex_format_tuples[static_cast<std::size_t>(pixel_format)]; 173 return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
146} 174}
147 175
148GLenum GetTextureTarget(const SurfaceTarget& target) { 176GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
149 switch (target) { 177 switch (info.type) {
150 case SurfaceTarget::TextureBuffer: 178 case ImageType::e1D:
179 return GL_TEXTURE_1D_ARRAY;
180 case ImageType::e2D:
181 if (info.num_samples > 1) {
182 return GL_TEXTURE_2D_MULTISAMPLE_ARRAY;
183 }
184 return GL_TEXTURE_2D_ARRAY;
185 case ImageType::e3D:
186 return GL_TEXTURE_3D;
187 case ImageType::Linear:
188 return GL_TEXTURE_2D_ARRAY;
189 case ImageType::Buffer:
151 return GL_TEXTURE_BUFFER; 190 return GL_TEXTURE_BUFFER;
152 case SurfaceTarget::Texture1D: 191 }
192 UNREACHABLE_MSG("Invalid image type={}", info.type);
193 return GL_NONE;
194}
195
196GLenum ImageTarget(ImageViewType type, int num_samples = 1) {
197 const bool is_multisampled = num_samples > 1;
198 switch (type) {
199 case ImageViewType::e1D:
153 return GL_TEXTURE_1D; 200 return GL_TEXTURE_1D;
154 case SurfaceTarget::Texture2D: 201 case ImageViewType::e2D:
155 return GL_TEXTURE_2D; 202 return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
156 case SurfaceTarget::Texture3D: 203 case ImageViewType::Cube:
204 return GL_TEXTURE_CUBE_MAP;
205 case ImageViewType::e3D:
157 return GL_TEXTURE_3D; 206 return GL_TEXTURE_3D;
158 case SurfaceTarget::Texture1DArray: 207 case ImageViewType::e1DArray:
159 return GL_TEXTURE_1D_ARRAY; 208 return GL_TEXTURE_1D_ARRAY;
160 case SurfaceTarget::Texture2DArray: 209 case ImageViewType::e2DArray:
161 return GL_TEXTURE_2D_ARRAY; 210 return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY;
162 case SurfaceTarget::TextureCubemap: 211 case ImageViewType::CubeArray:
163 return GL_TEXTURE_CUBE_MAP;
164 case SurfaceTarget::TextureCubeArray:
165 return GL_TEXTURE_CUBE_MAP_ARRAY; 212 return GL_TEXTURE_CUBE_MAP_ARRAY;
213 case ImageViewType::Rect:
214 return GL_TEXTURE_RECTANGLE;
215 case ImageViewType::Buffer:
216 return GL_TEXTURE_BUFFER;
166 } 217 }
167 UNREACHABLE(); 218 UNREACHABLE_MSG("Invalid image view type={}", type);
168 return {}; 219 return GL_NONE;
169} 220}
170 221
171GLint GetSwizzleSource(SwizzleSource source) { 222GLenum TextureMode(PixelFormat format, bool is_first) {
223 switch (format) {
224 case PixelFormat::D24_UNORM_S8_UINT:
225 case PixelFormat::D32_FLOAT_S8_UINT:
226 return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
227 case PixelFormat::S8_UINT_D24_UNORM:
228 return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
229 default:
230 UNREACHABLE();
231 return GL_DEPTH_COMPONENT;
232 }
233}
234
235GLint Swizzle(SwizzleSource source) {
172 switch (source) { 236 switch (source) {
173 case SwizzleSource::Zero: 237 case SwizzleSource::Zero:
174 return GL_ZERO; 238 return GL_ZERO;
@@ -184,530 +248,813 @@ GLint GetSwizzleSource(SwizzleSource source) {
184 case SwizzleSource::OneFloat: 248 case SwizzleSource::OneFloat:
185 return GL_ONE; 249 return GL_ONE;
186 } 250 }
187 UNREACHABLE(); 251 UNREACHABLE_MSG("Invalid swizzle source={}", source);
188 return GL_NONE; 252 return GL_NONE;
189} 253}
190 254
191GLenum GetComponent(PixelFormat format, bool is_first) { 255GLenum AttachmentType(PixelFormat format) {
192 switch (format) { 256 switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) {
193 case PixelFormat::D24_UNORM_S8_UINT: 257 case SurfaceType::Depth:
194 case PixelFormat::D32_FLOAT_S8_UINT: 258 return GL_DEPTH_ATTACHMENT;
195 return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; 259 case SurfaceType::DepthStencil:
196 case PixelFormat::S8_UINT_D24_UNORM: 260 return GL_DEPTH_STENCIL_ATTACHMENT;
197 return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
198 default: 261 default:
199 UNREACHABLE(); 262 UNIMPLEMENTED_MSG("Unimplemented type={}", type);
200 return GL_DEPTH_COMPONENT; 263 return GL_NONE;
201 } 264 }
202} 265}
203 266
204void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { 267[[nodiscard]] bool IsConverted(const Device& device, PixelFormat format, ImageType type) {
205 if (params.IsBuffer()) { 268 if (!device.HasASTC() && IsPixelFormatASTC(format)) {
206 return; 269 return true;
207 } 270 }
208 glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 271 switch (format) {
209 glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 272 case PixelFormat::BC4_UNORM:
210 glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 273 case PixelFormat::BC5_UNORM:
211 glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); 274 return type == ImageType::e3D;
212 glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast<GLint>(params.num_levels - 1)); 275 default:
213 if (params.num_levels == 1) { 276 break;
214 glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f);
215 } 277 }
278 return false;
216} 279}
217 280
218OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format, 281[[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) {
219 OGLBuffer& texture_buffer) { 282 switch (value) {
220 OGLTexture texture; 283 case SwizzleSource::G:
221 texture.Create(target); 284 return SwizzleSource::R;
285 default:
286 return value;
287 }
288}
222 289
223 switch (params.target) { 290void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4> swizzle) {
224 case SurfaceTarget::Texture1D: 291 switch (format) {
225 glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width); 292 case PixelFormat::D24_UNORM_S8_UINT:
226 break; 293 case PixelFormat::D32_FLOAT_S8_UINT:
227 case SurfaceTarget::TextureBuffer: 294 case PixelFormat::S8_UINT_D24_UNORM:
228 texture_buffer.Create(); 295 UNIMPLEMENTED_IF(swizzle[0] != SwizzleSource::R && swizzle[0] != SwizzleSource::G);
229 glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), 296 glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
230 nullptr, GL_DYNAMIC_STORAGE_BIT); 297 TextureMode(format, swizzle[0] == SwizzleSource::R));
231 glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); 298 std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
232 break; 299 break;
233 case SurfaceTarget::Texture2D: 300 default:
234 case SurfaceTarget::TextureCubemap:
235 glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width,
236 params.height);
237 break; 301 break;
238 case SurfaceTarget::Texture3D: 302 }
239 case SurfaceTarget::Texture2DArray: 303 std::array<GLint, 4> gl_swizzle;
240 case SurfaceTarget::TextureCubeArray: 304 std::ranges::transform(swizzle, gl_swizzle.begin(), Swizzle);
241 glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width, 305 glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
242 params.height, params.depth); 306}
307
308[[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime,
309 const VideoCommon::ImageInfo& info) {
310 // Disable accelerated uploads for now as they don't implement swizzled uploads
311 return false;
312 switch (info.type) {
313 case ImageType::e2D:
314 case ImageType::e3D:
315 case ImageType::Linear:
243 break; 316 break;
244 default: 317 default:
245 UNREACHABLE(); 318 return false;
319 }
320 const GLenum internal_format = GetFormatTuple(info.format).internal_format;
321 const auto& format_info = runtime.FormatInfo(info.type, internal_format);
322 if (format_info.is_compressed) {
323 return false;
324 }
325 if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) {
326 return false;
246 } 327 }
328 if (format_info.compatibility_by_size) {
329 return true;
330 }
331 const GLenum store_format = StoreFormat(BytesPerBlock(info.format));
332 const GLenum store_class = runtime.FormatInfo(info.type, store_format).compatibility_class;
333 return format_info.compatibility_class == store_class;
334}
247 335
248 ApplyTextureDefaults(params, texture.handle); 336[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset,
337 VideoCommon::SubresourceLayers subresource, GLenum target) {
338 switch (target) {
339 case GL_TEXTURE_2D_ARRAY:
340 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
341 return CopyOrigin{
342 .level = static_cast<GLint>(subresource.base_level),
343 .x = static_cast<GLint>(offset.x),
344 .y = static_cast<GLint>(offset.y),
345 .z = static_cast<GLint>(subresource.base_layer),
346 };
347 case GL_TEXTURE_3D:
348 return CopyOrigin{
349 .level = static_cast<GLint>(subresource.base_level),
350 .x = static_cast<GLint>(offset.x),
351 .y = static_cast<GLint>(offset.y),
352 .z = static_cast<GLint>(offset.z),
353 };
354 default:
355 UNIMPLEMENTED_MSG("Unimplemented copy target={}", target);
356 return CopyOrigin{.level = 0, .x = 0, .y = 0, .z = 0};
357 }
358}
249 359
250 return texture; 360[[nodiscard]] CopyRegion MakeCopyRegion(VideoCommon::Extent3D extent,
361 VideoCommon::SubresourceLayers dst_subresource,
362 GLenum target) {
363 switch (target) {
364 case GL_TEXTURE_2D_ARRAY:
365 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
366 return CopyRegion{
367 .width = static_cast<GLsizei>(extent.width),
368 .height = static_cast<GLsizei>(extent.height),
369 .depth = static_cast<GLsizei>(dst_subresource.num_layers),
370 };
371 case GL_TEXTURE_3D:
372 return CopyRegion{
373 .width = static_cast<GLsizei>(extent.width),
374 .height = static_cast<GLsizei>(extent.height),
375 .depth = static_cast<GLsizei>(extent.depth),
376 };
377 default:
378 UNIMPLEMENTED_MSG("Unimplemented copy target={}", target);
379 return CopyRegion{.width = 0, .height = 0, .depth = 0};
380 }
251} 381}
252 382
253constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, 383void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
254 SwizzleSource w_source) { 384 if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
255 return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | 385 const GLuint texture = image_view->DefaultHandle();
256 (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); 386 glNamedFramebufferTexture(fbo, attachment, texture, 0);
387 return;
388 }
389 const GLuint texture = image_view->Handle(ImageViewType::e3D);
390 if (image_view->range.extent.layers > 1) {
391 // TODO: OpenGL doesn't support rendering to a fixed number of slices
392 glNamedFramebufferTexture(fbo, attachment, texture, 0);
393 } else {
394 const u32 slice = image_view->range.base.layer;
395 glNamedFramebufferTextureLayer(fbo, attachment, texture, 0, slice);
396 }
257} 397}
258 398
259} // Anonymous namespace 399} // Anonymous namespace
260 400
261CachedSurface::CachedSurface(const GPUVAddr gpu_addr_, const SurfaceParams& params_, 401ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_)
262 bool is_astc_supported_) 402 : span(map, size), sync{sync_}, handle{handle_} {}
263 : SurfaceBase<View>{gpu_addr_, params_, is_astc_supported_} {
264 if (is_converted) {
265 internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8;
266 format = GL_RGBA;
267 type = GL_UNSIGNED_BYTE;
268 } else {
269 const auto& tuple{GetFormatTuple(params.pixel_format)};
270 internal_format = tuple.internal_format;
271 format = tuple.format;
272 type = tuple.type;
273 is_compressed = params.IsCompressed();
274 }
275 target = GetTextureTarget(params.target);
276 texture = CreateTexture(params, target, internal_format, texture_buffer);
277 DecorateSurfaceName();
278 403
279 u32 num_layers = 1; 404ImageBufferMap::~ImageBufferMap() {
280 if (params.is_layered || params.target == SurfaceTarget::Texture3D) { 405 if (sync) {
281 num_layers = params.depth; 406 sync->Create();
282 } 407 }
283
284 main_view =
285 CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true);
286} 408}
287 409
288CachedSurface::~CachedSurface() = default; 410TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
411 StateTracker& state_tracker_)
412 : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) {
413 static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
414 for (size_t i = 0; i < TARGETS.size(); ++i) {
415 const GLenum target = TARGETS[i];
416 for (const FormatTuple& tuple : FORMAT_TABLE) {
417 const GLenum format = tuple.internal_format;
418 GLint compat_class;
419 GLint compat_type;
420 GLint is_compressed;
421 glGetInternalformativ(target, format, GL_IMAGE_COMPATIBILITY_CLASS, 1, &compat_class);
422 glGetInternalformativ(target, format, GL_IMAGE_FORMAT_COMPATIBILITY_TYPE, 1,
423 &compat_type);
424 glGetInternalformativ(target, format, GL_TEXTURE_COMPRESSED, 1, &is_compressed);
425 const FormatProperties properties{
426 .compatibility_class = static_cast<GLenum>(compat_class),
427 .compatibility_by_size = compat_type == GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE,
428 .is_compressed = is_compressed == GL_TRUE,
429 };
430 format_properties[i].emplace(format, properties);
431 }
432 }
433 has_broken_texture_view_formats = device.HasBrokenTextureViewFormats();
434
435 null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY);
436 null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY);
437 null_image_3d.Create(GL_TEXTURE_3D);
438 null_image_rect.Create(GL_TEXTURE_RECTANGLE);
439 glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1);
440 glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6);
441 glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1);
442 glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1);
443
444 std::array<GLuint, 4> new_handles;
445 glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data());
446 null_image_view_1d.handle = new_handles[0];
447 null_image_view_2d.handle = new_handles[1];
448 null_image_view_2d_array.handle = new_handles[2];
449 null_image_view_cube.handle = new_handles[3];
450 glTextureView(null_image_view_1d.handle, GL_TEXTURE_1D, null_image_1d_array.handle, GL_R8, 0, 1,
451 0, 1);
452 glTextureView(null_image_view_2d.handle, GL_TEXTURE_2D, null_image_cube_array.handle, GL_R8, 0,
453 1, 0, 1);
454 glTextureView(null_image_view_2d_array.handle, GL_TEXTURE_2D_ARRAY,
455 null_image_cube_array.handle, GL_R8, 0, 1, 0, 1);
456 glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle,
457 GL_R8, 0, 1, 0, 6);
458 const std::array texture_handles{
459 null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
460 null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle,
461 null_image_view_2d_array.handle, null_image_view_cube.handle,
462 };
463 for (const GLuint handle : texture_handles) {
464 static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO};
465 glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data());
466 }
467 const auto set_view = [this](ImageViewType type, GLuint handle) {
468 if (device.HasDebuggingToolAttached()) {
469 const std::string name = fmt::format("NullImage {}", type);
470 glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
471 }
472 null_image_views[static_cast<size_t>(type)] = handle;
473 };
474 set_view(ImageViewType::e1D, null_image_view_1d.handle);
475 set_view(ImageViewType::e2D, null_image_view_2d.handle);
476 set_view(ImageViewType::Cube, null_image_view_cube.handle);
477 set_view(ImageViewType::e3D, null_image_3d.handle);
478 set_view(ImageViewType::e1DArray, null_image_1d_array.handle);
479 set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle);
480 set_view(ImageViewType::CubeArray, null_image_cube_array.handle);
481 set_view(ImageViewType::Rect, null_image_rect.handle);
482}
289 483
290void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { 484TextureCacheRuntime::~TextureCacheRuntime() = default;
291 MICROPROFILE_SCOPE(OpenGL_Texture_Download);
292 485
293 if (params.IsBuffer()) { 486void TextureCacheRuntime::Finish() {
294 glGetNamedBufferSubData(texture_buffer.handle, 0, 487 glFinish();
295 static_cast<GLsizeiptr>(params.GetHostSizeInBytes(false)), 488}
296 staging_buffer.data());
297 return;
298 }
299 489
300 SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); 490ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
491 return upload_buffers.RequestMap(size, true);
492}
301 493
302 for (u32 level = 0; level < params.emulated_levels; ++level) { 494ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
303 glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); 495 return download_buffers.RequestMap(size, false);
304 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); 496}
305 const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
306 497
307 u8* const mip_data = staging_buffer.data() + mip_offset; 498void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,
308 const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level)); 499 std::span<const ImageCopy> copies) {
309 if (is_compressed) { 500 const GLuint dst_name = dst_image.Handle();
310 glGetCompressedTextureImage(texture.handle, level, size, mip_data); 501 const GLuint src_name = src_image.Handle();
311 } else { 502 const GLenum dst_target = ImageTarget(dst_image.info);
312 glGetTextureImage(texture.handle, level, format, type, size, mip_data); 503 const GLenum src_target = ImageTarget(src_image.info);
313 } 504 for (const ImageCopy& copy : copies) {
505 const auto src_origin = MakeCopyOrigin(copy.src_offset, copy.src_subresource, src_target);
506 const auto dst_origin = MakeCopyOrigin(copy.dst_offset, copy.dst_subresource, dst_target);
507 const auto region = MakeCopyRegion(copy.extent, copy.dst_subresource, dst_target);
508 glCopyImageSubData(src_name, src_target, src_origin.level, src_origin.x, src_origin.y,
509 src_origin.z, dst_name, dst_target, dst_origin.level, dst_origin.x,
510 dst_origin.y, dst_origin.z, region.width, region.height, region.depth);
314 } 511 }
315} 512}
316 513
317void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { 514bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) {
318 MICROPROFILE_SCOPE(OpenGL_Texture_Upload); 515 if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
319 SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); 516 return false;
320 for (u32 level = 0; level < params.emulated_levels; ++level) {
321 UploadTextureMipmap(level, staging_buffer);
322 } 517 }
518 return true;
323} 519}
324 520
325void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) { 521void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src,
326 glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); 522 std::span<const ImageCopy> copies) {
327 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); 523 if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
328 524 ASSERT(src.info.type == ImageType::e3D);
329 const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); 525 util_shaders.CopyBC4(dst, src, copies);
330 const u8* buffer{staging_buffer.data() + mip_offset};
331 if (is_compressed) {
332 const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))};
333 switch (params.target) {
334 case SurfaceTarget::Texture2D:
335 glCompressedTextureSubImage2D(texture.handle, level, 0, 0,
336 static_cast<GLsizei>(params.GetMipWidth(level)),
337 static_cast<GLsizei>(params.GetMipHeight(level)),
338 internal_format, image_size, buffer);
339 break;
340 case SurfaceTarget::Texture3D:
341 case SurfaceTarget::Texture2DArray:
342 case SurfaceTarget::TextureCubeArray:
343 glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0,
344 static_cast<GLsizei>(params.GetMipWidth(level)),
345 static_cast<GLsizei>(params.GetMipHeight(level)),
346 static_cast<GLsizei>(params.GetMipDepth(level)),
347 internal_format, image_size, buffer);
348 break;
349 case SurfaceTarget::TextureCubemap: {
350 const std::size_t host_layer_size{params.GetHostLayerSize(level)};
351 for (std::size_t face = 0; face < params.depth; ++face) {
352 glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),
353 static_cast<GLsizei>(params.GetMipWidth(level)),
354 static_cast<GLsizei>(params.GetMipHeight(level)), 1,
355 internal_format,
356 static_cast<GLsizei>(host_layer_size), buffer);
357 buffer += host_layer_size;
358 }
359 break;
360 }
361 default:
362 UNREACHABLE();
363 }
364 } else { 526 } else {
365 switch (params.target) { 527 UNREACHABLE();
366 case SurfaceTarget::Texture1D:
367 glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type,
368 buffer);
369 break;
370 case SurfaceTarget::TextureBuffer:
371 ASSERT(level == 0);
372 glNamedBufferSubData(texture_buffer.handle, 0,
373 params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer);
374 break;
375 case SurfaceTarget::Texture1DArray:
376 case SurfaceTarget::Texture2D:
377 glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level),
378 params.GetMipHeight(level), format, type, buffer);
379 break;
380 case SurfaceTarget::Texture3D:
381 case SurfaceTarget::Texture2DArray:
382 case SurfaceTarget::TextureCubeArray:
383 glTextureSubImage3D(
384 texture.handle, level, 0, 0, 0, static_cast<GLsizei>(params.GetMipWidth(level)),
385 static_cast<GLsizei>(params.GetMipHeight(level)),
386 static_cast<GLsizei>(params.GetMipDepth(level)), format, type, buffer);
387 break;
388 case SurfaceTarget::TextureCubemap:
389 for (std::size_t face = 0; face < params.depth; ++face) {
390 glTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),
391 params.GetMipWidth(level), params.GetMipHeight(level), 1,
392 format, type, buffer);
393 buffer += params.GetHostLayerSize(level);
394 }
395 break;
396 default:
397 UNREACHABLE();
398 }
399 } 528 }
400} 529}
401 530
402void CachedSurface::DecorateSurfaceName() { 531void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
403 LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); 532 const std::array<Offset2D, 2>& dst_region,
404} 533 const std::array<Offset2D, 2>& src_region,
534 Tegra::Engines::Fermi2D::Filter filter,
535 Tegra::Engines::Fermi2D::Operation operation) {
536 state_tracker.NotifyScissor0();
537 state_tracker.NotifyRasterizeEnable();
538 state_tracker.NotifyFramebufferSRGB();
405 539
406void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) { 540 ASSERT(dst->BufferBits() == src->BufferBits());
407 LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix); 541
542 glEnable(GL_FRAMEBUFFER_SRGB);
543 glDisable(GL_RASTERIZER_DISCARD);
544 glDisablei(GL_SCISSOR_TEST, 0);
545
546 const GLbitfield buffer_bits = dst->BufferBits();
547 const bool has_depth = (buffer_bits & ~GL_COLOR_BUFFER_BIT) != 0;
548 const bool is_linear = !has_depth && filter == Tegra::Engines::Fermi2D::Filter::Bilinear;
549 glBlitNamedFramebuffer(src->Handle(), dst->Handle(), src_region[0].x, src_region[0].y,
550 src_region[1].x, src_region[1].y, dst_region[0].x, dst_region[0].y,
551 dst_region[1].x, dst_region[1].y, buffer_bits,
552 is_linear ? GL_LINEAR : GL_NEAREST);
408} 553}
409 554
410View CachedSurface::CreateView(const ViewParams& view_key) { 555void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map,
411 return CreateViewInner(view_key, false); 556 size_t buffer_offset,
557 std::span<const SwizzleParameters> swizzles) {
558 switch (image.info.type) {
559 case ImageType::e2D:
560 return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles);
561 case ImageType::e3D:
562 return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles);
563 case ImageType::Linear:
564 return util_shaders.PitchUpload(image, map, buffer_offset, swizzles);
565 default:
566 UNREACHABLE();
567 break;
568 }
412} 569}
413 570
414View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) { 571void TextureCacheRuntime::InsertUploadMemoryBarrier() {
415 auto view = std::make_shared<CachedSurfaceView>(*this, view_key, is_proxy); 572 glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
416 views[view_key] = view;
417 if (!is_proxy)
418 view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++));
419 return view;
420} 573}
421 574
422CachedSurfaceView::CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, 575FormatProperties TextureCacheRuntime::FormatInfo(ImageType type, GLenum internal_format) const {
423 bool is_proxy_) 576 switch (type) {
424 : ViewBase{params_}, surface{surface_}, format{surface_.internal_format}, 577 case ImageType::e1D:
425 target{GetTextureTarget(params_.target)}, is_proxy{is_proxy_} { 578 return format_properties[0].at(internal_format);
426 if (!is_proxy_) { 579 case ImageType::e2D:
427 main_view = CreateTextureView(); 580 case ImageType::Linear:
581 return format_properties[1].at(internal_format);
582 case ImageType::e3D:
583 return format_properties[2].at(internal_format);
584 default:
585 UNREACHABLE();
586 return FormatProperties{};
428 } 587 }
429} 588}
430 589
431CachedSurfaceView::~CachedSurfaceView() = default; 590TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
591 : storage_flags{storage_flags_}, map_flags{map_flags_} {}
432 592
433void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const { 593TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default;
434 ASSERT(params.num_levels == 1);
435 594
436 if (params.target == SurfaceTarget::Texture3D) { 595ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size,
437 if (params.num_layers > 1) { 596 bool insert_fence) {
438 ASSERT(params.base_layer == 0); 597 const size_t index = RequestBuffer(requested_size);
439 glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level); 598 OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
440 } else { 599 return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync);
441 glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle, 600}
442 params.base_level, params.base_layer); 601
443 } 602size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
444 return; 603 if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
604 return *index;
445 } 605 }
446 606
447 if (params.num_layers > 1) { 607 OGLBuffer& buffer = buffers.emplace_back();
448 UNIMPLEMENTED_IF(params.base_layer != 0); 608 buffer.Create();
449 glFramebufferTexture(fb_target, attachment, GetTexture(), 0); 609 glNamedBufferStorage(buffer.handle, requested_size, nullptr,
450 return; 610 storage_flags | GL_MAP_PERSISTENT_BIT);
611 maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size,
612 map_flags | GL_MAP_PERSISTENT_BIT)));
613
614 syncs.emplace_back();
615 sizes.push_back(requested_size);
616
617 ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
618 maps.size() == sizes.size());
619
620 return buffers.size() - 1;
621}
622
623std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) {
624 size_t smallest_buffer = std::numeric_limits<size_t>::max();
625 std::optional<size_t> found;
626 const size_t num_buffers = sizes.size();
627 for (size_t index = 0; index < num_buffers; ++index) {
628 const size_t buffer_size = sizes[index];
629 if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
630 continue;
631 }
632 if (syncs[index].handle != 0) {
633 GLint status;
634 glGetSynciv(syncs[index].handle, GL_SYNC_STATUS, 1, nullptr, &status);
635 if (status != GL_SIGNALED) {
636 continue;
637 }
638 syncs[index].Release();
639 }
640 smallest_buffer = buffer_size;
641 found = index;
451 } 642 }
643 return found;
644}
452 645
453 const GLenum view_target = surface.GetTarget(); 646Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
454 const GLuint texture = surface.GetTexture(); 647 VAddr cpu_addr_)
455 switch (surface.GetSurfaceParams().target) { 648 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) {
456 case SurfaceTarget::Texture1D: 649 if (CanBeAccelerated(runtime, info)) {
457 glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level); 650 flags |= ImageFlagBits::AcceleratedUpload;
651 }
652 if (IsConverted(runtime.device, info.format, info.type)) {
653 flags |= ImageFlagBits::Converted;
654 gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
655 gl_format = GL_RGBA;
656 gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
657 } else {
658 const auto& tuple = GetFormatTuple(info.format);
659 gl_internal_format = tuple.internal_format;
660 gl_format = tuple.format;
661 gl_type = tuple.type;
662 }
663 const GLenum target = ImageTarget(info);
664 const GLsizei width = info.size.width;
665 const GLsizei height = info.size.height;
666 const GLsizei depth = info.size.depth;
667 const int max_host_mip_levels = std::bit_width(info.size.width);
668 const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels);
669 const GLsizei num_layers = info.resources.layers;
670 const GLsizei num_samples = info.num_samples;
671
672 GLuint handle = 0;
673 if (target != GL_TEXTURE_BUFFER) {
674 texture.Create(target);
675 handle = texture.handle;
676 }
677 switch (target) {
678 case GL_TEXTURE_1D_ARRAY:
679 glTextureStorage2D(handle, num_levels, gl_internal_format, width, num_layers);
458 break; 680 break;
459 case SurfaceTarget::Texture2D: 681 case GL_TEXTURE_2D_ARRAY:
460 glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level); 682 glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, num_layers);
461 break; 683 break;
462 case SurfaceTarget::Texture1DArray: 684 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: {
463 case SurfaceTarget::Texture2DArray: 685 // TODO: Where should 'fixedsamplelocations' come from?
464 case SurfaceTarget::TextureCubemap: 686 const auto [samples_x, samples_y] = SamplesLog2(info.num_samples);
465 case SurfaceTarget::TextureCubeArray: 687 glTextureStorage3DMultisample(handle, num_samples, gl_internal_format, width >> samples_x,
466 glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level, 688 height >> samples_y, num_layers, GL_FALSE);
467 params.base_layer); 689 break;
690 }
691 case GL_TEXTURE_RECTANGLE:
692 glTextureStorage2D(handle, num_levels, gl_internal_format, width, height);
693 break;
694 case GL_TEXTURE_3D:
695 glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
696 break;
697 case GL_TEXTURE_BUFFER:
698 buffer.Create();
699 glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0);
468 break; 700 break;
469 default: 701 default:
470 UNIMPLEMENTED(); 702 UNREACHABLE_MSG("Invalid target=0x{:x}", target);
703 break;
704 }
705 if (runtime.device.HasDebuggingToolAttached()) {
706 const std::string name = VideoCommon::Name(*this);
707 glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle,
708 static_cast<GLsizei>(name.size()), name.data());
471 } 709 }
472} 710}
473 711
474GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source, 712void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
475 SwizzleSource z_source, SwizzleSource w_source) { 713 std::span<const VideoCommon::BufferImageCopy> copies) {
476 if (GetSurfaceParams().IsBuffer()) { 714 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle());
477 return GetTexture(); 715 glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes);
478 }
479 const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
480 if (current_swizzle == new_swizzle) {
481 return current_view;
482 }
483 current_swizzle = new_swizzle;
484 716
485 const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); 717 glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
486 OGLTextureView& view = entry->second;
487 if (!is_cache_miss) {
488 current_view = view.handle;
489 return view.handle;
490 }
491 view = CreateTextureView();
492 current_view = view.handle;
493 718
494 std::array swizzle{x_source, y_source, z_source, w_source}; 719 u32 current_row_length = std::numeric_limits<u32>::max();
720 u32 current_image_height = std::numeric_limits<u32>::max();
495 721
496 switch (const PixelFormat pixel_format = GetSurfaceParams().pixel_format) { 722 for (const VideoCommon::BufferImageCopy& copy : copies) {
497 case PixelFormat::D24_UNORM_S8_UINT: 723 if (current_row_length != copy.buffer_row_length) {
498 case PixelFormat::D32_FLOAT_S8_UINT: 724 current_row_length = copy.buffer_row_length;
499 case PixelFormat::S8_UINT_D24_UNORM: 725 glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length);
500 UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); 726 }
501 glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE, 727 if (current_image_height != copy.buffer_image_height) {
502 GetComponent(pixel_format, x_source == SwizzleSource::R)); 728 current_image_height = copy.buffer_image_height;
503 729 glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height);
504 // Make sure we sample the first component 730 }
505 std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) { 731 CopyBufferToImage(copy, buffer_offset);
506 return value == SwizzleSource::G ? SwizzleSource::R : value;
507 });
508 [[fallthrough]];
509 default: {
510 const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]),
511 GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])};
512 glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
513 break;
514 }
515 } 732 }
516 return view.handle;
517} 733}
518 734
519OGLTextureView CachedSurfaceView::CreateTextureView() const { 735void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
520 OGLTextureView texture_view; 736 std::span<const VideoCommon::BufferCopy> copies) {
521 texture_view.Create(); 737 for (const VideoCommon::BufferCopy& copy : copies) {
522 738 glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset,
523 if (target == GL_TEXTURE_3D) { 739 copy.dst_offset, copy.size);
524 glTextureView(texture_view.handle, target, surface.texture.handle, format,
525 params.base_level, params.num_levels, 0, 1);
526 } else {
527 glTextureView(texture_view.handle, target, surface.texture.handle, format,
528 params.base_level, params.num_levels, params.base_layer, params.num_layers);
529 } 740 }
530 ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
531
532 return texture_view;
533} 741}
534 742
535TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_, 743void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
536 Tegra::Engines::Maxwell3D& maxwell3d_, 744 std::span<const VideoCommon::BufferImageCopy> copies) {
537 Tegra::MemoryManager& gpu_memory_, const Device& device_, 745 glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
538 StateTracker& state_tracker_)
539 : TextureCacheBase{rasterizer_, maxwell3d_, gpu_memory_, device_.HasASTC()},
540 state_tracker{state_tracker_} {
541 src_framebuffer.Create();
542 dst_framebuffer.Create();
543}
544 746
545TextureCacheOpenGL::~TextureCacheOpenGL() = default; 747 glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle());
748 glPixelStorei(GL_PACK_ALIGNMENT, 1);
546 749
547Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { 750 u32 current_row_length = std::numeric_limits<u32>::max();
548 return std::make_shared<CachedSurface>(gpu_addr, params, is_astc_supported); 751 u32 current_image_height = std::numeric_limits<u32>::max();
549}
550 752
551void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, 753 for (const VideoCommon::BufferImageCopy& copy : copies) {
552 const VideoCommon::CopyParams& copy_params) { 754 if (current_row_length != copy.buffer_row_length) {
553 const auto& src_params = src_surface->GetSurfaceParams(); 755 current_row_length = copy.buffer_row_length;
554 const auto& dst_params = dst_surface->GetSurfaceParams(); 756 glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length);
555 if (src_params.type != dst_params.type) { 757 }
556 // A fallback is needed 758 if (current_image_height != copy.buffer_image_height) {
557 return; 759 current_image_height = copy.buffer_image_height;
760 glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
761 }
762 CopyImageToBuffer(copy, buffer_offset);
558 } 763 }
559 const auto src_handle = src_surface->GetTexture();
560 const auto src_target = src_surface->GetTarget();
561 const auto dst_handle = dst_surface->GetTexture();
562 const auto dst_target = dst_surface->GetTarget();
563 glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x,
564 copy_params.source_y, copy_params.source_z, dst_handle, dst_target,
565 copy_params.dest_level, copy_params.dest_x, copy_params.dest_y,
566 copy_params.dest_z, copy_params.width, copy_params.height,
567 copy_params.depth);
568} 764}
569 765
570void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, 766void Image::CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) {
571 const Tegra::Engines::Fermi2D::Config& copy_config) { 767 // Compressed formats don't have a pixel format or type
572 const auto& src_params{src_view->GetSurfaceParams()}; 768 const bool is_compressed = gl_format == GL_NONE;
573 const auto& dst_params{dst_view->GetSurfaceParams()}; 769 const void* const offset = reinterpret_cast<const void*>(copy.buffer_offset + buffer_offset);
574 UNIMPLEMENTED_IF(src_params.depth != 1);
575 UNIMPLEMENTED_IF(dst_params.depth != 1);
576
577 state_tracker.NotifyScissor0();
578 state_tracker.NotifyFramebuffer();
579 state_tracker.NotifyRasterizeEnable();
580 state_tracker.NotifyFramebufferSRGB();
581 770
582 if (dst_params.srgb_conversion) { 771 switch (info.type) {
583 glEnable(GL_FRAMEBUFFER_SRGB); 772 case ImageType::e1D:
584 } else { 773 if (is_compressed) {
585 glDisable(GL_FRAMEBUFFER_SRGB); 774 glCompressedTextureSubImage2D(texture.handle, copy.image_subresource.base_level,
775 copy.image_offset.x, copy.image_subresource.base_layer,
776 copy.image_extent.width,
777 copy.image_subresource.num_layers, gl_internal_format,
778 static_cast<GLsizei>(copy.buffer_size), offset);
779 } else {
780 glTextureSubImage2D(texture.handle, copy.image_subresource.base_level,
781 copy.image_offset.x, copy.image_subresource.base_layer,
782 copy.image_extent.width, copy.image_subresource.num_layers,
783 gl_format, gl_type, offset);
784 }
785 break;
786 case ImageType::e2D:
787 case ImageType::Linear:
788 if (is_compressed) {
789 glCompressedTextureSubImage3D(
790 texture.handle, copy.image_subresource.base_level, copy.image_offset.x,
791 copy.image_offset.y, copy.image_subresource.base_layer, copy.image_extent.width,
792 copy.image_extent.height, copy.image_subresource.num_layers, gl_internal_format,
793 static_cast<GLsizei>(copy.buffer_size), offset);
794 } else {
795 glTextureSubImage3D(texture.handle, copy.image_subresource.base_level,
796 copy.image_offset.x, copy.image_offset.y,
797 copy.image_subresource.base_layer, copy.image_extent.width,
798 copy.image_extent.height, copy.image_subresource.num_layers,
799 gl_format, gl_type, offset);
800 }
801 break;
802 case ImageType::e3D:
803 if (is_compressed) {
804 glCompressedTextureSubImage3D(
805 texture.handle, copy.image_subresource.base_level, copy.image_offset.x,
806 copy.image_offset.y, copy.image_offset.z, copy.image_extent.width,
807 copy.image_extent.height, copy.image_extent.depth, gl_internal_format,
808 static_cast<GLsizei>(copy.buffer_size), offset);
809 } else {
810 glTextureSubImage3D(texture.handle, copy.image_subresource.base_level,
811 copy.image_offset.x, copy.image_offset.y, copy.image_offset.z,
812 copy.image_extent.width, copy.image_extent.height,
813 copy.image_extent.depth, gl_format, gl_type, offset);
814 }
815 break;
816 default:
817 UNREACHABLE();
586 } 818 }
587 glDisable(GL_RASTERIZER_DISCARD); 819}
588 glDisablei(GL_SCISSOR_TEST, 0);
589
590 glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle);
591 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle);
592
593 GLenum buffers = 0;
594 if (src_params.type == SurfaceType::ColorTexture) {
595 src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER);
596 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
597 0);
598
599 dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
600 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
601 0);
602
603 buffers = GL_COLOR_BUFFER_BIT;
604 } else if (src_params.type == SurfaceType::Depth) {
605 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
606 src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER);
607 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
608 820
609 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); 821void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) {
610 dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); 822 const GLint x_offset = copy.image_offset.x;
611 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); 823 const GLsizei width = copy.image_extent.width;
612 824
613 buffers = GL_DEPTH_BUFFER_BIT; 825 const GLint level = copy.image_subresource.base_level;
614 } else if (src_params.type == SurfaceType::DepthStencil) { 826 const GLsizei buffer_size = static_cast<GLsizei>(copy.buffer_size);
615 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); 827 void* const offset = reinterpret_cast<void*>(copy.buffer_offset + buffer_offset);
616 src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER);
617 828
618 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); 829 GLint y_offset = 0;
619 dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER); 830 GLint z_offset = 0;
831 GLsizei height = 1;
832 GLsizei depth = 1;
620 833
621 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; 834 switch (info.type) {
835 case ImageType::e1D:
836 y_offset = copy.image_subresource.base_layer;
837 height = copy.image_subresource.num_layers;
838 break;
839 case ImageType::e2D:
840 case ImageType::Linear:
841 y_offset = copy.image_offset.y;
842 z_offset = copy.image_subresource.base_layer;
843 height = copy.image_extent.height;
844 depth = copy.image_subresource.num_layers;
845 break;
846 case ImageType::e3D:
847 y_offset = copy.image_offset.y;
848 z_offset = copy.image_offset.z;
849 height = copy.image_extent.height;
850 depth = copy.image_extent.depth;
851 break;
852 default:
853 UNREACHABLE();
854 }
855 // Compressed formats don't have a pixel format or type
856 const bool is_compressed = gl_format == GL_NONE;
857 if (is_compressed) {
858 glGetCompressedTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width,
859 height, depth, buffer_size, offset);
860 } else {
861 glGetTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, height,
862 depth, gl_format, gl_type, buffer_size, offset);
622 } 863 }
623
624 const Common::Rectangle<u32>& src_rect = copy_config.src_rect;
625 const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect;
626 const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
627
628 glBlitFramebuffer(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.top),
629 static_cast<GLint>(src_rect.right), static_cast<GLint>(src_rect.bottom),
630 static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.top),
631 static_cast<GLint>(dst_rect.right), static_cast<GLint>(dst_rect.bottom),
632 buffers,
633 is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST);
634} 864}
635 865
636void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { 866ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
637 MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy); 867 ImageId image_id_, Image& image)
638 const auto& src_params = src_surface->GetSurfaceParams(); 868 : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} {
639 const auto& dst_params = dst_surface->GetSurfaceParams(); 869 const Device& device = runtime.device;
640 UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); 870 if (True(image.flags & ImageFlagBits::Converted)) {
871 internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
872 } else {
873 internal_format = GetFormatTuple(format).internal_format;
874 }
875 VideoCommon::SubresourceRange flatten_range = info.range;
876 std::array<GLuint, 2> handles;
877 stored_views.reserve(2);
641 878
642 const auto source_format = GetFormatTuple(src_params.pixel_format); 879 switch (info.type) {
643 const auto dest_format = GetFormatTuple(dst_params.pixel_format); 880 case ImageViewType::e1DArray:
881 flatten_range.extent.layers = 1;
882 [[fallthrough]];
883 case ImageViewType::e1D:
884 glGenTextures(2, handles.data());
885 SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range);
886 SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range);
887 break;
888 case ImageViewType::e2DArray:
889 flatten_range.extent.layers = 1;
890 [[fallthrough]];
891 case ImageViewType::e2D:
892 if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) {
893 // 2D and 2D array views on a 3D textures are used exclusively for render targets
894 ASSERT(info.range.extent.levels == 1);
895 const VideoCommon::SubresourceRange slice_range{
896 .base = {.level = info.range.base.level, .layer = 0},
897 .extent = {.levels = 1, .layers = 1},
898 };
899 glGenTextures(1, handles.data());
900 SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range);
901 break;
902 }
903 glGenTextures(2, handles.data());
904 SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range);
905 SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range);
906 break;
907 case ImageViewType::e3D:
908 glGenTextures(1, handles.data());
909 SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range);
910 break;
911 case ImageViewType::CubeArray:
912 flatten_range.extent.layers = 6;
913 [[fallthrough]];
914 case ImageViewType::Cube:
915 glGenTextures(2, handles.data());
916 SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range);
917 SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range);
918 break;
919 case ImageViewType::Rect:
920 glGenTextures(1, handles.data());
921 SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range);
922 break;
923 case ImageViewType::Buffer:
924 glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data());
925 SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range);
926 break;
927 }
928 default_handle = Handle(info.type);
929}
644 930
645 const std::size_t source_size = src_surface->GetHostSizeInBytes(); 931ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
646 const std::size_t dest_size = dst_surface->GetHostSizeInBytes(); 932 : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
647 933
648 const std::size_t buffer_size = std::max(source_size, dest_size); 934void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type,
935 GLuint handle, const VideoCommon::ImageViewInfo& info,
936 VideoCommon::SubresourceRange view_range) {
937 if (info.type == ImageViewType::Buffer) {
938 // TODO: Take offset from buffer cache
939 glTextureBufferRange(handle, internal_format, image.buffer.handle, 0,
940 image.guest_size_bytes);
941 } else {
942 const GLuint parent = image.texture.handle;
943 const GLenum target = ImageTarget(view_type, image.info.num_samples);
944 glTextureView(handle, target, parent, internal_format, view_range.base.level,
945 view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
946 if (!info.IsRenderTarget()) {
947 ApplySwizzle(handle, format, info.Swizzle());
948 }
949 }
950 if (device.HasDebuggingToolAttached()) {
951 const std::string name = VideoCommon::Name(*this, view_type);
952 glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
953 }
954 stored_views.emplace_back().handle = handle;
955 views[static_cast<size_t>(view_type)] = handle;
956}
649 957
650 GLuint copy_pbo_handle = FetchPBO(buffer_size); 958Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
959 const GLenum compare_mode = config.depth_compare_enabled ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE;
960 const GLenum compare_func = MaxwellToGL::DepthCompareFunc(config.depth_compare_func);
961 const GLenum mag = MaxwellToGL::TextureFilterMode(config.mag_filter, TextureMipmapFilter::None);
962 const GLenum min = MaxwellToGL::TextureFilterMode(config.min_filter, config.mipmap_filter);
963 const GLenum reduction_filter = MaxwellToGL::ReductionFilter(config.reduction_filter);
964 const GLint seamless = config.cubemap_interface_filtering ? GL_TRUE : GL_FALSE;
965
966 UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1);
967 UNIMPLEMENTED_IF(config.float_coord_normalization != 0);
968
969 sampler.Create();
970 const GLuint handle = sampler.handle;
971 glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u));
972 glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v));
973 glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p));
974 glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode);
975 glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func);
976 glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag);
977 glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min);
978 glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias());
979 glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod());
980 glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod());
981 glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data());
982
983 if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) {
984 glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy());
985 } else {
986 LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required");
987 }
988 if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) {
989 glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter);
990 } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) {
991 LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required");
992 }
993 if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) {
994 glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless);
995 } else if (seamless == GL_FALSE) {
996 // We default to false because it's more common
997 LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required");
998 }
999}
651 1000
652 glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); 1001Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
1002 ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
1003 // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of
1004 // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared
1005 // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with
1006 // mismatching size, this is why core framebuffers are preferred.
1007 GLuint handle;
1008 glGenFramebuffers(1, &handle);
1009 glBindFramebuffer(GL_READ_FRAMEBUFFER, handle);
1010
1011 GLsizei num_buffers = 0;
1012 std::array<GLenum, NUM_RT> gl_draw_buffers;
1013 gl_draw_buffers.fill(GL_NONE);
1014
1015 for (size_t index = 0; index < color_buffers.size(); ++index) {
1016 const ImageView* const image_view = color_buffers[index];
1017 if (!image_view) {
1018 continue;
1019 }
1020 buffer_bits |= GL_COLOR_BUFFER_BIT;
1021 gl_draw_buffers[index] = GL_COLOR_ATTACHMENT0 + key.draw_buffers[index];
1022 num_buffers = static_cast<GLsizei>(index + 1);
653 1023
654 if (src_surface->IsCompressed()) { 1024 const GLenum attachment = static_cast<GLenum>(GL_COLOR_ATTACHMENT0 + index);
655 glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size), 1025 AttachTexture(handle, attachment, image_view);
656 nullptr);
657 } else {
658 glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type,
659 static_cast<GLsizei>(source_size), nullptr);
660 } 1026 }
661 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
662 1027
663 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); 1028 if (const ImageView* const image_view = depth_buffer; image_view) {
1029 if (GetFormatType(image_view->format) == SurfaceType::DepthStencil) {
1030 buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
1031 } else {
1032 buffer_bits |= GL_DEPTH_BUFFER_BIT;
1033 }
1034 const GLenum attachment = AttachmentType(image_view->format);
1035 AttachTexture(handle, attachment, image_view);
1036 }
664 1037
665 const GLsizei width = static_cast<GLsizei>(dst_params.width); 1038 if (num_buffers > 1) {
666 const GLsizei height = static_cast<GLsizei>(dst_params.height); 1039 glNamedFramebufferDrawBuffers(handle, num_buffers, gl_draw_buffers.data());
667 const GLsizei depth = static_cast<GLsizei>(dst_params.depth); 1040 } else if (num_buffers > 0) {
668 if (dst_surface->IsCompressed()) { 1041 glNamedFramebufferDrawBuffer(handle, gl_draw_buffers[0]);
669 LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!");
670 UNREACHABLE();
671 } else { 1042 } else {
672 switch (dst_params.target) { 1043 glNamedFramebufferDrawBuffer(handle, GL_NONE);
673 case SurfaceTarget::Texture1D:
674 glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format,
675 dest_format.type, nullptr);
676 break;
677 case SurfaceTarget::Texture2D:
678 glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height,
679 dest_format.format, dest_format.type, nullptr);
680 break;
681 case SurfaceTarget::Texture3D:
682 case SurfaceTarget::Texture2DArray:
683 case SurfaceTarget::TextureCubeArray:
684 glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
685 dest_format.format, dest_format.type, nullptr);
686 break;
687 case SurfaceTarget::TextureCubemap:
688 glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
689 dest_format.format, dest_format.type, nullptr);
690 break;
691 default:
692 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", dst_params.target);
693 UNREACHABLE();
694 }
695 } 1044 }
696 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
697 1045
698 glTextureBarrier(); 1046 glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_WIDTH, key.size.width);
699} 1047 glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_HEIGHT, key.size.height);
1048 // TODO
1049 // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_LAYERS, ...);
1050 // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_SAMPLES, ...);
1051 // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS, ...);
700 1052
701GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { 1053 if (runtime.device.HasDebuggingToolAttached()) {
702 ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; }); 1054 const std::string name = VideoCommon::Name(key);
703 const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size)); 1055 glObjectLabel(GL_FRAMEBUFFER, handle, static_cast<GLsizei>(name.size()), name.data());
704 OGLBuffer& cp = copy_pbo_cache[l2];
705 if (cp.handle == 0) {
706 const std::size_t ceil_size = 1ULL << l2;
707 cp.Create();
708 cp.MakeStreamCopy(ceil_size);
709 } 1056 }
710 return cp.handle; 1057 framebuffer.handle = handle;
711} 1058}
712 1059
713} // namespace OpenGL 1060} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 72b284fab..15b7c3676 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -4,157 +4,251 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <functional>
9#include <memory> 7#include <memory>
10#include <unordered_map> 8#include <span>
11#include <utility>
12#include <vector>
13 9
14#include <glad/glad.h> 10#include <glad/glad.h>
15 11
16#include "common/common_types.h"
17#include "video_core/engines/shader_bytecode.h"
18#include "video_core/renderer_opengl/gl_device.h"
19#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/util_shaders.h"
20#include "video_core/texture_cache/texture_cache.h" 14#include "video_core/texture_cache/texture_cache.h"
21 15
22namespace OpenGL { 16namespace OpenGL {
23 17
24using VideoCommon::SurfaceParams; 18class Device;
25using VideoCommon::ViewParams; 19class ProgramManager;
26
27class CachedSurfaceView;
28class CachedSurface;
29class TextureCacheOpenGL;
30class StateTracker; 20class StateTracker;
31 21
32using Surface = std::shared_ptr<CachedSurface>; 22class Framebuffer;
33using View = std::shared_ptr<CachedSurfaceView>; 23class Image;
34using TextureCacheBase = VideoCommon::TextureCache<Surface, View>; 24class ImageView;
25class Sampler;
35 26
36class CachedSurface final : public VideoCommon::SurfaceBase<View> { 27using VideoCommon::ImageId;
37 friend CachedSurfaceView; 28using VideoCommon::ImageViewId;
29using VideoCommon::ImageViewType;
30using VideoCommon::NUM_RT;
31using VideoCommon::Offset2D;
32using VideoCommon::RenderTargets;
38 33
34class ImageBufferMap {
39public: 35public:
40 explicit CachedSurface(GPUVAddr gpu_addr_, const SurfaceParams& params_, 36 explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
41 bool is_astc_supported_); 37 ~ImageBufferMap();
42 ~CachedSurface();
43
44 void UploadTexture(const std::vector<u8>& staging_buffer) override;
45 void DownloadTexture(std::vector<u8>& staging_buffer) override;
46 38
47 GLenum GetTarget() const { 39 GLuint Handle() const noexcept {
48 return target; 40 return handle;
49 } 41 }
50 42
51 GLuint GetTexture() const { 43 std::span<u8> Span() const noexcept {
52 return texture.handle; 44 return span;
53 } 45 }
54 46
55 bool IsCompressed() const { 47private:
56 return is_compressed; 48 std::span<u8> span;
49 OGLSync* sync;
50 GLuint handle;
51};
52
53struct FormatProperties {
54 GLenum compatibility_class;
55 bool compatibility_by_size;
56 bool is_compressed;
57};
58
59class TextureCacheRuntime {
60 friend Framebuffer;
61 friend Image;
62 friend ImageView;
63 friend Sampler;
64
65public:
66 explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
67 StateTracker& state_tracker);
68 ~TextureCacheRuntime();
69
70 void Finish();
71
72 ImageBufferMap MapUploadBuffer(size_t size);
73
74 ImageBufferMap MapDownloadBuffer(size_t size);
75
76 void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
77
78 void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
79 UNIMPLEMENTED();
57 } 80 }
58 81
59protected: 82 bool CanImageBeCopied(const Image& dst, const Image& src);
60 void DecorateSurfaceName() override; 83
84 void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
85
86 void BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
87 const std::array<Offset2D, 2>& dst_region,
88 const std::array<Offset2D, 2>& src_region,
89 Tegra::Engines::Fermi2D::Filter filter,
90 Tegra::Engines::Fermi2D::Operation operation);
91
92 void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
93 std::span<const VideoCommon::SwizzleParameters> swizzles);
61 94
62 View CreateView(const ViewParams& view_key) override; 95 void InsertUploadMemoryBarrier();
63 View CreateViewInner(const ViewParams& view_key, bool is_proxy); 96
97 FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
98
99 bool HasBrokenTextureViewFormats() const noexcept {
100 return has_broken_texture_view_formats;
101 }
64 102
65private: 103private:
66 void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer); 104 struct StagingBuffers {
105 explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
106 ~StagingBuffers();
67 107
68 GLenum internal_format{}; 108 ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
69 GLenum format{};
70 GLenum type{};
71 bool is_compressed{};
72 GLenum target{};
73 u32 view_count{};
74 109
75 OGLTexture texture; 110 size_t RequestBuffer(size_t requested_size);
76 OGLBuffer texture_buffer; 111
112 std::optional<size_t> FindBuffer(size_t requested_size);
113
114 std::vector<OGLSync> syncs;
115 std::vector<OGLBuffer> buffers;
116 std::vector<u8*> maps;
117 std::vector<size_t> sizes;
118 GLenum storage_flags;
119 GLenum map_flags;
120 };
121
122 const Device& device;
123 StateTracker& state_tracker;
124 UtilShaders util_shaders;
125
126 std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
127 bool has_broken_texture_view_formats = false;
128
129 StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
130 StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT};
131
132 OGLTexture null_image_1d_array;
133 OGLTexture null_image_cube_array;
134 OGLTexture null_image_3d;
135 OGLTexture null_image_rect;
136 OGLTextureView null_image_view_1d;
137 OGLTextureView null_image_view_2d;
138 OGLTextureView null_image_view_2d_array;
139 OGLTextureView null_image_view_cube;
140
141 std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
77}; 142};
78 143
79class CachedSurfaceView final : public VideoCommon::ViewBase { 144class Image : public VideoCommon::ImageBase {
145 friend ImageView;
146
80public: 147public:
81 explicit CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, bool is_proxy_); 148 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
82 ~CachedSurfaceView(); 149 VAddr cpu_addr);
83 150
84 /// @brief Attaches this texture view to the currently bound fb_target framebuffer 151 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
85 /// @param attachment Attachment to bind textures to 152 std::span<const VideoCommon::BufferImageCopy> copies);
86 /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER)
87 void Attach(GLenum attachment, GLenum fb_target) const;
88 153
89 GLuint GetTexture(Tegra::Texture::SwizzleSource x_source, 154 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
90 Tegra::Texture::SwizzleSource y_source, 155 std::span<const VideoCommon::BufferCopy> copies);
91 Tegra::Texture::SwizzleSource z_source,
92 Tegra::Texture::SwizzleSource w_source);
93 156
94 void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix); 157 void DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
158 std::span<const VideoCommon::BufferImageCopy> copies);
95 159
96 void MarkAsModified(u64 tick) { 160 GLuint Handle() const noexcept {
97 surface.MarkAsModified(true, tick); 161 return texture.handle;
98 } 162 }
99 163
100 GLuint GetTexture() const { 164private:
101 if (is_proxy) { 165 void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
102 return surface.GetTexture(); 166
103 } 167 void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
104 return main_view.handle; 168
169 OGLTexture texture;
170 OGLTextureView store_view;
171 OGLBuffer buffer;
172 GLenum gl_internal_format = GL_NONE;
173 GLenum gl_format = GL_NONE;
174 GLenum gl_type = GL_NONE;
175};
176
177class ImageView : public VideoCommon::ImageViewBase {
178 friend Image;
179
180public:
181 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
182 explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
183
184 [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
185 return views[static_cast<size_t>(query_type)];
105 } 186 }
106 187
107 GLenum GetFormat() const { 188 [[nodiscard]] GLuint DefaultHandle() const noexcept {
108 return format; 189 return default_handle;
109 } 190 }
110 191
111 const SurfaceParams& GetSurfaceParams() const { 192 [[nodiscard]] GLenum Format() const noexcept {
112 return surface.GetSurfaceParams(); 193 return internal_format;
113 } 194 }
114 195
115private: 196private:
116 OGLTextureView CreateTextureView() const; 197 void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
198 const VideoCommon::ImageViewInfo& info,
199 VideoCommon::SubresourceRange view_range);
200
201 std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
202 std::vector<OGLTextureView> stored_views;
203 GLuint default_handle = 0;
204 GLenum internal_format = GL_NONE;
205};
206
207class ImageAlloc : public VideoCommon::ImageAllocBase {};
117 208
118 CachedSurface& surface; 209class Sampler {
119 const GLenum format; 210public:
120 const GLenum target; 211 explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
121 const bool is_proxy;
122 212
123 std::unordered_map<u32, OGLTextureView> view_cache; 213 GLuint Handle() const noexcept {
124 OGLTextureView main_view; 214 return sampler.handle;
215 }
125 216
126 // Use an invalid default so it always fails the comparison test 217private:
127 u32 current_swizzle = 0xffffffff; 218 OGLSampler sampler;
128 GLuint current_view = 0;
129}; 219};
130 220
131class TextureCacheOpenGL final : public TextureCacheBase { 221class Framebuffer {
132public: 222public:
133 explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_, 223 explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
134 Tegra::Engines::Maxwell3D& maxwell3d_, 224 ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
135 Tegra::MemoryManager& gpu_memory_, const Device& device_,
136 StateTracker& state_tracker);
137 ~TextureCacheOpenGL();
138
139protected:
140 Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override;
141
142 void ImageCopy(Surface& src_surface, Surface& dst_surface,
143 const VideoCommon::CopyParams& copy_params) override;
144 225
145 void ImageBlit(View& src_view, View& dst_view, 226 [[nodiscard]] GLuint Handle() const noexcept {
146 const Tegra::Engines::Fermi2D::Config& copy_config) override; 227 return framebuffer.handle;
228 }
147 229
148 void BufferCopy(Surface& src_surface, Surface& dst_surface) override; 230 [[nodiscard]] GLbitfield BufferBits() const noexcept {
231 return buffer_bits;
232 }
149 233
150private: 234private:
151 GLuint FetchPBO(std::size_t buffer_size); 235 OGLFramebuffer framebuffer;
152 236 GLbitfield buffer_bits = GL_NONE;
153 StateTracker& state_tracker; 237};
154 238
155 OGLFramebuffer src_framebuffer; 239struct TextureCacheParams {
156 OGLFramebuffer dst_framebuffer; 240 static constexpr bool ENABLE_VALIDATION = true;
157 std::unordered_map<u32, OGLBuffer> copy_pbo_cache; 241 static constexpr bool FRAMEBUFFER_BLITS = true;
242 static constexpr bool HAS_EMULATED_COPIES = true;
243
244 using Runtime = OpenGL::TextureCacheRuntime;
245 using Image = OpenGL::Image;
246 using ImageAlloc = OpenGL::ImageAlloc;
247 using ImageView = OpenGL::ImageView;
248 using Sampler = OpenGL::Sampler;
249 using Framebuffer = OpenGL::Framebuffer;
158}; 250};
159 251
252using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
253
160} // namespace OpenGL 254} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index dd4ee3361..cbccfdeb4 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -475,6 +475,19 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
475 return GL_FILL; 475 return GL_FILL;
476} 476}
477 477
478inline GLenum ReductionFilter(Tegra::Texture::SamplerReduction filter) {
479 switch (filter) {
480 case Tegra::Texture::SamplerReduction::WeightedAverage:
481 return GL_WEIGHTED_AVERAGE_ARB;
482 case Tegra::Texture::SamplerReduction::Min:
483 return GL_MIN;
484 case Tegra::Texture::SamplerReduction::Max:
485 return GL_MAX;
486 }
487 UNREACHABLE_MSG("Invalid reduction filter={}", static_cast<int>(filter));
488 return GL_WEIGHTED_AVERAGE_ARB;
489}
490
478inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) { 491inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
479 // Enumeration order matches register order. We can convert it arithmetically. 492 // Enumeration order matches register order. We can convert it arithmetically.
480 return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle); 493 return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index cbfaaa99c..dd77a543c 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -23,10 +23,10 @@
23#include "core/telemetry_session.h" 23#include "core/telemetry_session.h"
24#include "video_core/host_shaders/opengl_present_frag.h" 24#include "video_core/host_shaders/opengl_present_frag.h"
25#include "video_core/host_shaders/opengl_present_vert.h" 25#include "video_core/host_shaders/opengl_present_vert.h"
26#include "video_core/morton.h"
27#include "video_core/renderer_opengl/gl_rasterizer.h" 26#include "video_core/renderer_opengl/gl_rasterizer.h"
28#include "video_core/renderer_opengl/gl_shader_manager.h" 27#include "video_core/renderer_opengl/gl_shader_manager.h"
29#include "video_core/renderer_opengl/renderer_opengl.h" 28#include "video_core/renderer_opengl/renderer_opengl.h"
29#include "video_core/textures/decoders.h"
30 30
31namespace OpenGL { 31namespace OpenGL {
32 32
@@ -140,11 +140,10 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
140 if (!framebuffer) { 140 if (!framebuffer) {
141 return; 141 return;
142 } 142 }
143
144 PrepareRendertarget(framebuffer); 143 PrepareRendertarget(framebuffer);
145 RenderScreenshot(); 144 RenderScreenshot();
146 145
147 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); 146 state_tracker.BindFramebuffer(0);
148 DrawScreen(emu_window.GetFramebufferLayout()); 147 DrawScreen(emu_window.GetFramebufferLayout());
149 148
150 ++m_current_frame; 149 ++m_current_frame;
@@ -187,19 +186,20 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
187 // Reset the screen info's display texture to its own permanent texture 186 // Reset the screen info's display texture to its own permanent texture
188 screen_info.display_texture = screen_info.texture.resource.handle; 187 screen_info.display_texture = screen_info.texture.resource.handle;
189 188
190 const auto pixel_format{
191 VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
192 const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
193 const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
194 u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
195 rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
196
197 // TODO(Rodrigo): Read this from HLE 189 // TODO(Rodrigo): Read this from HLE
198 constexpr u32 block_height_log2 = 4; 190 constexpr u32 block_height_log2 = 4;
199 VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, 191 const auto pixel_format{
200 framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, 192 VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
201 gl_framebuffer_data.data(), host_ptr); 193 const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
202 194 const u64 size_in_bytes{Tegra::Texture::CalculateSize(
195 true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
196 const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
197 const std::span<const u8> input_data(host_ptr, size_in_bytes);
198 Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel,
199 framebuffer.width, framebuffer.height, 1, block_height_log2,
200 0);
201
202 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
203 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); 203 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
204 204
205 // Update existing texture 205 // Update existing texture
@@ -238,6 +238,10 @@ void RendererOpenGL::InitOpenGLObjects() {
238 glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); 238 glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
239 glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); 239 glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
240 240
241 // Generate presentation sampler
242 present_sampler.Create();
243 glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
244
241 // Generate VBO handle for drawing 245 // Generate VBO handle for drawing
242 vertex_buffer.Create(); 246 vertex_buffer.Create();
243 247
@@ -255,6 +259,11 @@ void RendererOpenGL::InitOpenGLObjects() {
255 // Clear screen to black 259 // Clear screen to black
256 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); 260 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
257 261
262 // Enable seamless cubemaps when per texture parameters are not available
263 if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
264 glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
265 }
266
258 // Enable unified vertex attributes and query vertex buffer address when the driver supports it 267 // Enable unified vertex attributes and query vertex buffer address when the driver supports it
259 if (device.HasVertexBufferUnifiedMemory()) { 268 if (device.HasVertexBufferUnifiedMemory()) {
260 glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); 269 glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
@@ -296,7 +305,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
296 305
297 const auto pixel_format{ 306 const auto pixel_format{
298 VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; 307 VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
299 const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; 308 const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
300 gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel); 309 gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel);
301 310
302 GLint internal_format; 311 GLint internal_format;
@@ -315,8 +324,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
315 internal_format = GL_RGBA8; 324 internal_format = GL_RGBA8;
316 texture.gl_format = GL_RGBA; 325 texture.gl_format = GL_RGBA;
317 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; 326 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
318 UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", 327 // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
319 static_cast<u32>(framebuffer.pixel_format)); 328 // static_cast<u32>(framebuffer.pixel_format));
320 } 329 }
321 330
322 texture.resource.Release(); 331 texture.resource.Release();
@@ -382,7 +391,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
382 state_tracker.NotifyPolygonModes(); 391 state_tracker.NotifyPolygonModes();
383 state_tracker.NotifyViewport0(); 392 state_tracker.NotifyViewport0();
384 state_tracker.NotifyScissor0(); 393 state_tracker.NotifyScissor0();
385 state_tracker.NotifyColorMask0(); 394 state_tracker.NotifyColorMask(0);
386 state_tracker.NotifyBlend0(); 395 state_tracker.NotifyBlend0();
387 state_tracker.NotifyFramebuffer(); 396 state_tracker.NotifyFramebuffer();
388 state_tracker.NotifyFrontFace(); 397 state_tracker.NotifyFrontFace();
@@ -440,7 +449,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
440 } 449 }
441 450
442 glBindTextureUnit(0, screen_info.display_texture); 451 glBindTextureUnit(0, screen_info.display_texture);
443 glBindSampler(0, 0); 452 glBindSampler(0, present_sampler.handle);
444 453
445 glClear(GL_COLOR_BUFFER_BIT); 454 glClear(GL_COLOR_BUFFER_BIT);
446 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 455 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
@@ -473,6 +482,8 @@ void RendererOpenGL::RenderScreenshot() {
473 482
474 DrawScreen(layout); 483 DrawScreen(layout);
475 484
485 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
486 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
476 glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, 487 glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
477 renderer_settings.screenshot_bits); 488 renderer_settings.screenshot_bits);
478 489
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 376f88766..44e109794 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -102,6 +102,7 @@ private:
102 StateTracker state_tracker{gpu}; 102 StateTracker state_tracker{gpu};
103 103
104 // OpenGL object IDs 104 // OpenGL object IDs
105 OGLSampler present_sampler;
105 OGLBuffer vertex_buffer; 106 OGLBuffer vertex_buffer;
106 OGLProgram vertex_program; 107 OGLProgram vertex_program;
107 OGLProgram fragment_program; 108 OGLProgram fragment_program;
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
new file mode 100644
index 000000000..eb849cbf2
--- /dev/null
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -0,0 +1,224 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <bit>
6#include <span>
7#include <string_view>
8
9#include <glad/glad.h>
10
11#include "common/assert.h"
12#include "common/common_types.h"
13#include "common/div_ceil.h"
14#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
15#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
16#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
17#include "video_core/host_shaders/pitch_unswizzle_comp.h"
18#include "video_core/renderer_opengl/gl_resource_manager.h"
19#include "video_core/renderer_opengl/gl_shader_manager.h"
20#include "video_core/renderer_opengl/gl_texture_cache.h"
21#include "video_core/renderer_opengl/util_shaders.h"
22#include "video_core/surface.h"
23#include "video_core/texture_cache/accelerated_swizzle.h"
24#include "video_core/texture_cache/types.h"
25#include "video_core/texture_cache/util.h"
26#include "video_core/textures/decoders.h"
27
28namespace OpenGL {
29
30using namespace HostShaders;
31
32using VideoCommon::Extent3D;
33using VideoCommon::ImageCopy;
34using VideoCommon::ImageType;
35using VideoCommon::SwizzleParameters;
36using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams;
37using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
38using VideoCore::Surface::BytesPerBlock;
39
40namespace {
41
42OGLProgram MakeProgram(std::string_view source) {
43 OGLShader shader;
44 shader.Create(source, GL_COMPUTE_SHADER);
45
46 OGLProgram program;
47 program.Create(true, false, shader.handle);
48 return program;
49}
50
51} // Anonymous namespace
52
53UtilShaders::UtilShaders(ProgramManager& program_manager_)
54 : program_manager{program_manager_},
55 block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
56 block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
57 pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
58 copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
59 const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
60 swizzle_table_buffer.Create();
61 glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
62}
63
64UtilShaders::~UtilShaders() = default;
65
66void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
67 std::span<const SwizzleParameters> swizzles) {
68 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
69 static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
70 static constexpr GLuint BINDING_INPUT_BUFFER = 1;
71 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
72
73 program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
74 glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
75 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
76
77 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
78 for (const SwizzleParameters& swizzle : swizzles) {
79 const Extent3D num_tiles = swizzle.num_tiles;
80 const size_t input_offset = swizzle.buffer_offset + buffer_offset;
81
82 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
83 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
84
85 const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
86 glUniform3uiv(0, 1, params.origin.data());
87 glUniform3iv(1, 1, params.destination.data());
88 glUniform1ui(2, params.bytes_per_block_log2);
89 glUniform1ui(3, params.layer_stride);
90 glUniform1ui(4, params.block_size);
91 glUniform1ui(5, params.x_shift);
92 glUniform1ui(6, params.block_height);
93 glUniform1ui(7, params.block_height_mask);
94 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
95 input_offset, image.guest_size_bytes - swizzle.buffer_offset);
96 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
97 GL_WRITE_ONLY, store_format);
98 glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
99 }
100 program_manager.RestoreGuestCompute();
101}
102
103void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
104 std::span<const SwizzleParameters> swizzles) {
105 static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
106
107 static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
108 static constexpr GLuint BINDING_INPUT_BUFFER = 1;
109 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
110
111 glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
112 program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
113 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
114
115 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
116 for (const SwizzleParameters& swizzle : swizzles) {
117 const Extent3D num_tiles = swizzle.num_tiles;
118 const size_t input_offset = swizzle.buffer_offset + buffer_offset;
119
120 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
121 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
122 const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth);
123
124 const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info);
125 glUniform3uiv(0, 1, params.origin.data());
126 glUniform3iv(1, 1, params.destination.data());
127 glUniform1ui(2, params.bytes_per_block_log2);
128 glUniform1ui(3, params.slice_size);
129 glUniform1ui(4, params.block_size);
130 glUniform1ui(5, params.x_shift);
131 glUniform1ui(6, params.block_height);
132 glUniform1ui(7, params.block_height_mask);
133 glUniform1ui(8, params.block_depth);
134 glUniform1ui(9, params.block_depth_mask);
135 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
136 input_offset, image.guest_size_bytes - swizzle.buffer_offset);
137 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
138 GL_WRITE_ONLY, store_format);
139 glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
140 }
141 program_manager.RestoreGuestCompute();
142}
143
144void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
145 std::span<const SwizzleParameters> swizzles) {
146 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
147 static constexpr GLuint BINDING_INPUT_BUFFER = 0;
148 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
149 static constexpr GLuint LOC_ORIGIN = 0;
150 static constexpr GLuint LOC_DESTINATION = 1;
151 static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
152 static constexpr GLuint LOC_PITCH = 3;
153
154 const u32 bytes_per_block = BytesPerBlock(image.info.format);
155 const GLenum format = StoreFormat(bytes_per_block);
156 const u32 pitch = image.info.pitch;
157
158 UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
159 "Non-power of two images are not implemented");
160
161 program_manager.BindHostCompute(pitch_unswizzle_program.handle);
162 glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
163 glUniform2ui(LOC_ORIGIN, 0, 0);
164 glUniform2i(LOC_DESTINATION, 0, 0);
165 glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
166 glUniform1ui(LOC_PITCH, pitch);
167 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format);
168 for (const SwizzleParameters& swizzle : swizzles) {
169 const Extent3D num_tiles = swizzle.num_tiles;
170 const size_t input_offset = swizzle.buffer_offset + buffer_offset;
171
172 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
173 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
174
175 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
176 input_offset, image.guest_size_bytes - swizzle.buffer_offset);
177 glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
178 }
179 program_manager.RestoreGuestCompute();
180}
181
182void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) {
183 static constexpr GLuint BINDING_INPUT_IMAGE = 0;
184 static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
185 static constexpr GLuint LOC_SRC_OFFSET = 0;
186 static constexpr GLuint LOC_DST_OFFSET = 1;
187
188 program_manager.BindHostCompute(copy_bc4_program.handle);
189
190 for (const ImageCopy& copy : copies) {
191 ASSERT(copy.src_subresource.base_layer == 0);
192 ASSERT(copy.src_subresource.num_layers == 1);
193 ASSERT(copy.dst_subresource.base_layer == 0);
194 ASSERT(copy.dst_subresource.num_layers == 1);
195
196 glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
197 glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
198 glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level,
199 GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
200 glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(),
201 copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
202 glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
203 }
204 program_manager.RestoreGuestCompute();
205}
206
207GLenum StoreFormat(u32 bytes_per_block) {
208 switch (bytes_per_block) {
209 case 1:
210 return GL_R8UI;
211 case 2:
212 return GL_R16UI;
213 case 4:
214 return GL_R32UI;
215 case 8:
216 return GL_RG32UI;
217 case 16:
218 return GL_RGBA32UI;
219 }
220 UNREACHABLE();
221 return GL_R8UI;
222}
223
224} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
new file mode 100644
index 000000000..359997255
--- /dev/null
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -0,0 +1,51 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <span>
8
9#include <glad/glad.h>
10
11#include "common/common_types.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/texture_cache/types.h"
14
15namespace OpenGL {
16
17class Image;
18class ImageBufferMap;
19class ProgramManager;
20
21class UtilShaders {
22public:
23 explicit UtilShaders(ProgramManager& program_manager);
24 ~UtilShaders();
25
26 void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
27 std::span<const VideoCommon::SwizzleParameters> swizzles);
28
29 void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
30 std::span<const VideoCommon::SwizzleParameters> swizzles);
31
32 void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
33 std::span<const VideoCommon::SwizzleParameters> swizzles);
34
35 void CopyBC4(Image& dst_image, Image& src_image,
36 std::span<const VideoCommon::ImageCopy> copies);
37
38private:
39 ProgramManager& program_manager;
40
41 OGLBuffer swizzle_table_buffer;
42
43 OGLProgram block_linear_unswizzle_2d_program;
44 OGLProgram block_linear_unswizzle_3d_program;
45 OGLProgram pitch_unswizzle_program;
46 OGLProgram copy_bc4_program;
47};
48
49GLenum StoreFormat(u32 bytes_per_block);
50
51} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
deleted file mode 100644
index 6d7bb16b2..000000000
--- a/src/video_core/renderer_opengl/utils.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6#include <vector>
7
8#include <fmt/format.h>
9#include <glad/glad.h>
10
11#include "common/common_types.h"
12#include "video_core/renderer_opengl/gl_state_tracker.h"
13#include "video_core/renderer_opengl/utils.h"
14
15namespace OpenGL {
16
17void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) {
18 if (!GLAD_GL_KHR_debug) {
19 // We don't need to throw an error as this is just for debugging
20 return;
21 }
22
23 std::string object_label;
24 if (extra_info.empty()) {
25 switch (identifier) {
26 case GL_TEXTURE:
27 object_label = fmt::format("Texture@0x{:016X}", addr);
28 break;
29 case GL_PROGRAM:
30 object_label = fmt::format("Shader@0x{:016X}", addr);
31 break;
32 default:
33 object_label = fmt::format("Object(0x{:X})@0x{:016X}", identifier, addr);
34 break;
35 }
36 } else {
37 object_label = fmt::format("{}@0x{:016X}", extra_info, addr);
38 }
39 glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));
40}
41
42} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
deleted file mode 100644
index 9c09ee12c..000000000
--- a/src/video_core/renderer_opengl/utils.h
+++ /dev/null
@@ -1,16 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string_view>
8#include <vector>
9#include <glad/glad.h>
10#include "common/common_types.h"
11
12namespace OpenGL {
13
14void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});
15
16} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
new file mode 100644
index 000000000..1f6a169ae
--- /dev/null
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -0,0 +1,624 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6
7#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
8#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
9#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
10#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h"
11#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
12#include "video_core/renderer_vulkan/blit_image.h"
13#include "video_core/renderer_vulkan/maxwell_to_vk.h"
14#include "video_core/renderer_vulkan/vk_scheduler.h"
15#include "video_core/renderer_vulkan/vk_shader_util.h"
16#include "video_core/renderer_vulkan/vk_state_tracker.h"
17#include "video_core/renderer_vulkan/vk_texture_cache.h"
18#include "video_core/renderer_vulkan/vk_update_descriptor.h"
19#include "video_core/surface.h"
20#include "video_core/vulkan_common/vulkan_device.h"
21#include "video_core/vulkan_common/vulkan_wrapper.h"
22
23namespace Vulkan {
24
25using VideoCommon::ImageViewType;
26
27namespace {
28struct PushConstants {
29 std::array<float, 2> tex_scale;
30 std::array<float, 2> tex_offset;
31};
32
33template <u32 binding>
34inline constexpr VkDescriptorSetLayoutBinding TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING{
35 .binding = binding,
36 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
37 .descriptorCount = 1,
38 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
39 .pImmutableSamplers = nullptr,
40};
41constexpr std::array TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS{
42 TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
43 TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<1>,
44};
45constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
46 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
47 .pNext = nullptr,
48 .flags = 0,
49 .bindingCount = 1,
50 .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
51};
52constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
53 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
54 .pNext = nullptr,
55 .flags = 0,
56 .bindingCount = static_cast<u32>(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()),
57 .pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(),
58};
59constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{
60 .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
61 .offset = 0,
62 .size = sizeof(PushConstants),
63};
64constexpr VkPipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{
65 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
66 .pNext = nullptr,
67 .flags = 0,
68 .vertexBindingDescriptionCount = 0,
69 .pVertexBindingDescriptions = nullptr,
70 .vertexAttributeDescriptionCount = 0,
71 .pVertexAttributeDescriptions = nullptr,
72};
73constexpr VkPipelineInputAssemblyStateCreateInfo PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO{
74 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
75 .pNext = nullptr,
76 .flags = 0,
77 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
78 .primitiveRestartEnable = VK_FALSE,
79};
80constexpr VkPipelineViewportStateCreateInfo PIPELINE_VIEWPORT_STATE_CREATE_INFO{
81 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
82 .pNext = nullptr,
83 .flags = 0,
84 .viewportCount = 1,
85 .pViewports = nullptr,
86 .scissorCount = 1,
87 .pScissors = nullptr,
88};
89constexpr VkPipelineRasterizationStateCreateInfo PIPELINE_RASTERIZATION_STATE_CREATE_INFO{
90 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
91 .pNext = nullptr,
92 .flags = 0,
93 .depthClampEnable = VK_FALSE,
94 .rasterizerDiscardEnable = VK_FALSE,
95 .polygonMode = VK_POLYGON_MODE_FILL,
96 .cullMode = VK_CULL_MODE_BACK_BIT,
97 .frontFace = VK_FRONT_FACE_CLOCKWISE,
98 .depthBiasEnable = VK_FALSE,
99 .depthBiasConstantFactor = 0.0f,
100 .depthBiasClamp = 0.0f,
101 .depthBiasSlopeFactor = 0.0f,
102 .lineWidth = 1.0f,
103};
104constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE_INFO{
105 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
106 .pNext = nullptr,
107 .flags = 0,
108 .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
109 .sampleShadingEnable = VK_FALSE,
110 .minSampleShading = 0.0f,
111 .pSampleMask = nullptr,
112 .alphaToCoverageEnable = VK_FALSE,
113 .alphaToOneEnable = VK_FALSE,
114};
115constexpr std::array DYNAMIC_STATES{
116 VK_DYNAMIC_STATE_VIEWPORT,
117 VK_DYNAMIC_STATE_SCISSOR,
118};
119constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{
120 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
121 .pNext = nullptr,
122 .flags = 0,
123 .dynamicStateCount = static_cast<u32>(DYNAMIC_STATES.size()),
124 .pDynamicStates = DYNAMIC_STATES.data(),
125};
126constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO{
127 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
128 .pNext = nullptr,
129 .flags = 0,
130 .logicOpEnable = VK_FALSE,
131 .logicOp = VK_LOGIC_OP_CLEAR,
132 .attachmentCount = 0,
133 .pAttachments = nullptr,
134 .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
135};
136constexpr VkPipelineColorBlendAttachmentState PIPELINE_COLOR_BLEND_ATTACHMENT_STATE{
137 .blendEnable = VK_FALSE,
138 .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
139 .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
140 .colorBlendOp = VK_BLEND_OP_ADD,
141 .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
142 .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
143 .alphaBlendOp = VK_BLEND_OP_ADD,
144 .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
145 VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
146};
147constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO{
148 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
149 .pNext = nullptr,
150 .flags = 0,
151 .logicOpEnable = VK_FALSE,
152 .logicOp = VK_LOGIC_OP_CLEAR,
153 .attachmentCount = 1,
154 .pAttachments = &PIPELINE_COLOR_BLEND_ATTACHMENT_STATE,
155 .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
156};
157constexpr VkPipelineDepthStencilStateCreateInfo PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO{
158 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
159 .pNext = nullptr,
160 .flags = 0,
161 .depthTestEnable = VK_TRUE,
162 .depthWriteEnable = VK_TRUE,
163 .depthCompareOp = VK_COMPARE_OP_ALWAYS,
164 .depthBoundsTestEnable = VK_FALSE,
165 .stencilTestEnable = VK_FALSE,
166 .front = VkStencilOpState{},
167 .back = VkStencilOpState{},
168 .minDepthBounds = 0.0f,
169 .maxDepthBounds = 0.0f,
170};
171
172template <VkFilter filter>
173inline constexpr VkSamplerCreateInfo SAMPLER_CREATE_INFO{
174 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
175 .pNext = nullptr,
176 .flags = 0,
177 .magFilter = filter,
178 .minFilter = filter,
179 .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
180 .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
181 .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
182 .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
183 .mipLodBias = 0.0f,
184 .anisotropyEnable = VK_FALSE,
185 .maxAnisotropy = 0.0f,
186 .compareEnable = VK_FALSE,
187 .compareOp = VK_COMPARE_OP_NEVER,
188 .minLod = 0.0f,
189 .maxLod = 0.0f,
190 .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE,
191 .unnormalizedCoordinates = VK_TRUE,
192};
193
194constexpr VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo(
195 const VkDescriptorSetLayout* set_layout) {
196 return VkPipelineLayoutCreateInfo{
197 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
198 .pNext = nullptr,
199 .flags = 0,
200 .setLayoutCount = 1,
201 .pSetLayouts = set_layout,
202 .pushConstantRangeCount = 1,
203 .pPushConstantRanges = &PUSH_CONSTANT_RANGE,
204 };
205}
206
207constexpr VkPipelineShaderStageCreateInfo PipelineShaderStageCreateInfo(VkShaderStageFlagBits stage,
208 VkShaderModule shader) {
209 return VkPipelineShaderStageCreateInfo{
210 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
211 .pNext = nullptr,
212 .flags = 0,
213 .stage = stage,
214 .module = shader,
215 .pName = "main",
216 .pSpecializationInfo = nullptr,
217 };
218}
219
220constexpr std::array<VkPipelineShaderStageCreateInfo, 2> MakeStages(
221 VkShaderModule vertex_shader, VkShaderModule fragment_shader) {
222 return std::array{
223 PipelineShaderStageCreateInfo(VK_SHADER_STAGE_VERTEX_BIT, vertex_shader),
224 PipelineShaderStageCreateInfo(VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader),
225 };
226}
227
228void UpdateOneTextureDescriptorSet(const Device& device, VkDescriptorSet descriptor_set,
229 VkSampler sampler, VkImageView image_view) {
230 const VkDescriptorImageInfo image_info{
231 .sampler = sampler,
232 .imageView = image_view,
233 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
234 };
235 const VkWriteDescriptorSet write_descriptor_set{
236 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
237 .pNext = nullptr,
238 .dstSet = descriptor_set,
239 .dstBinding = 0,
240 .dstArrayElement = 0,
241 .descriptorCount = 1,
242 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
243 .pImageInfo = &image_info,
244 .pBufferInfo = nullptr,
245 .pTexelBufferView = nullptr,
246 };
247 device.GetLogical().UpdateDescriptorSets(write_descriptor_set, nullptr);
248}
249
250void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descriptor_set,
251 VkSampler sampler, VkImageView image_view_0,
252 VkImageView image_view_1) {
253 const VkDescriptorImageInfo image_info_0{
254 .sampler = sampler,
255 .imageView = image_view_0,
256 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
257 };
258 const VkDescriptorImageInfo image_info_1{
259 .sampler = sampler,
260 .imageView = image_view_1,
261 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
262 };
263 const std::array write_descriptor_sets{
264 VkWriteDescriptorSet{
265 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
266 .pNext = nullptr,
267 .dstSet = descriptor_set,
268 .dstBinding = 0,
269 .dstArrayElement = 0,
270 .descriptorCount = 1,
271 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
272 .pImageInfo = &image_info_0,
273 .pBufferInfo = nullptr,
274 .pTexelBufferView = nullptr,
275 },
276 VkWriteDescriptorSet{
277 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
278 .pNext = nullptr,
279 .dstSet = descriptor_set,
280 .dstBinding = 1,
281 .dstArrayElement = 0,
282 .descriptorCount = 1,
283 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
284 .pImageInfo = &image_info_1,
285 .pBufferInfo = nullptr,
286 .pTexelBufferView = nullptr,
287 },
288 };
289 device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr);
290}
291
292void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout,
293 const std::array<Offset2D, 2>& dst_region,
294 const std::array<Offset2D, 2>& src_region) {
295 const VkOffset2D offset{
296 .x = std::min(dst_region[0].x, dst_region[1].x),
297 .y = std::min(dst_region[0].y, dst_region[1].y),
298 };
299 const VkExtent2D extent{
300 .width = static_cast<u32>(std::abs(dst_region[1].x - dst_region[0].x)),
301 .height = static_cast<u32>(std::abs(dst_region[1].y - dst_region[0].y)),
302 };
303 const VkViewport viewport{
304 .x = static_cast<float>(offset.x),
305 .y = static_cast<float>(offset.y),
306 .width = static_cast<float>(extent.width),
307 .height = static_cast<float>(extent.height),
308 .minDepth = 0.0f,
309 .maxDepth = 1.0f,
310 };
311 // TODO: Support scissored blits
312 const VkRect2D scissor{
313 .offset = offset,
314 .extent = extent,
315 };
316 const float scale_x = static_cast<float>(src_region[1].x - src_region[0].x);
317 const float scale_y = static_cast<float>(src_region[1].y - src_region[0].y);
318 const PushConstants push_constants{
319 .tex_scale = {scale_x, scale_y},
320 .tex_offset = {static_cast<float>(src_region[0].x), static_cast<float>(src_region[0].y)},
321 };
322 cmdbuf.SetViewport(0, viewport);
323 cmdbuf.SetScissor(0, scissor);
324 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
325}
326
327} // Anonymous namespace
328
329BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
330 StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool)
331 : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_},
332 one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout(
333 ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
334 two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout(
335 TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
336 one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout),
337 two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout),
338 one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(
339 PipelineLayoutCreateInfo(one_texture_set_layout.address()))),
340 two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
341 PipelineLayoutCreateInfo(two_textures_set_layout.address()))),
342 full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
343 blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)),
344 convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
345 convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
346 linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
347 nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {
348 if (device.IsExtShaderStencilExportSupported()) {
349 blit_depth_stencil_frag = BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV);
350 }
351}
352
353BlitImageHelper::~BlitImageHelper() = default;
354
355void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
356 const std::array<Offset2D, 2>& dst_region,
357 const std::array<Offset2D, 2>& src_region,
358 Tegra::Engines::Fermi2D::Filter filter,
359 Tegra::Engines::Fermi2D::Operation operation) {
360 const bool is_linear = filter == Tegra::Engines::Fermi2D::Filter::Bilinear;
361 const BlitImagePipelineKey key{
362 .renderpass = dst_framebuffer->RenderPass(),
363 .operation = operation,
364 };
365 const VkPipelineLayout layout = *one_texture_pipeline_layout;
366 const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
367 const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
368 const VkPipeline pipeline = FindOrEmplacePipeline(key);
369 const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
370 scheduler.RequestRenderpass(dst_framebuffer);
371 scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set,
372 &device = device](vk::CommandBuffer cmdbuf) {
373 // TODO: Barriers
374 UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
375 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
376 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
377 nullptr);
378 BindBlitState(cmdbuf, layout, dst_region, src_region);
379 cmdbuf.Draw(3, 1, 0, 0);
380 });
381 scheduler.InvalidateState();
382}
383
384void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
385 VkImageView src_depth_view, VkImageView src_stencil_view,
386 const std::array<Offset2D, 2>& dst_region,
387 const std::array<Offset2D, 2>& src_region,
388 Tegra::Engines::Fermi2D::Filter filter,
389 Tegra::Engines::Fermi2D::Operation operation) {
390 ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point);
391 ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy);
392
393 const VkPipelineLayout layout = *two_textures_pipeline_layout;
394 const VkSampler sampler = *nearest_sampler;
395 const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass());
396 const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
397 scheduler.RequestRenderpass(dst_framebuffer);
398 scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
399 src_stencil_view, descriptor_set,
400 &device = device](vk::CommandBuffer cmdbuf) {
401 // TODO: Barriers
402 UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view,
403 src_stencil_view);
404 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
405 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
406 nullptr);
407 BindBlitState(cmdbuf, layout, dst_region, src_region);
408 cmdbuf.Draw(3, 1, 0, 0);
409 });
410 scheduler.InvalidateState();
411}
412
413void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
414 const ImageView& src_image_view) {
415 ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass());
416 Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view);
417}
418
419void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
420 const ImageView& src_image_view) {
421
422 ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
423 Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
424}
425
426void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer,
427 const ImageView& src_image_view) {
428 ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass());
429 Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view);
430}
431
432void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
433 const ImageView& src_image_view) {
434 ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass());
435 Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view);
436}
437
438void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
439 const ImageView& src_image_view) {
440 const VkPipelineLayout layout = *one_texture_pipeline_layout;
441 const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
442 const VkSampler sampler = *nearest_sampler;
443 const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
444 const VkExtent2D extent{
445 .width = src_image_view.size.width,
446 .height = src_image_view.size.height,
447 };
448 scheduler.RequestRenderpass(dst_framebuffer);
449 scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent,
450 &device = device](vk::CommandBuffer cmdbuf) {
451 const VkOffset2D offset{
452 .x = 0,
453 .y = 0,
454 };
455 const VkViewport viewport{
456 .x = 0.0f,
457 .y = 0.0f,
458 .width = static_cast<float>(extent.width),
459 .height = static_cast<float>(extent.height),
460 .minDepth = 0.0f,
461 .maxDepth = 0.0f,
462 };
463 const VkRect2D scissor{
464 .offset = offset,
465 .extent = extent,
466 };
467 const PushConstants push_constants{
468 .tex_scale = {viewport.width, viewport.height},
469 .tex_offset = {0.0f, 0.0f},
470 };
471 UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
472
473 // TODO: Barriers
474 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
475 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
476 nullptr);
477 cmdbuf.SetViewport(0, viewport);
478 cmdbuf.SetScissor(0, scissor);
479 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
480 cmdbuf.Draw(3, 1, 0, 0);
481 });
482 scheduler.InvalidateState();
483}
484
485VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) {
486 const auto it = std::ranges::find(blit_color_keys, key);
487 if (it != blit_color_keys.end()) {
488 return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)];
489 }
490 blit_color_keys.push_back(key);
491
492 const std::array stages = MakeStages(*full_screen_vert, *blit_color_to_color_frag);
493 const VkPipelineColorBlendAttachmentState blend_attachment{
494 .blendEnable = VK_FALSE,
495 .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
496 .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
497 .colorBlendOp = VK_BLEND_OP_ADD,
498 .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
499 .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
500 .alphaBlendOp = VK_BLEND_OP_ADD,
501 .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
502 VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
503 };
504 // TODO: programmable blending
505 const VkPipelineColorBlendStateCreateInfo color_blend_create_info{
506 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
507 .pNext = nullptr,
508 .flags = 0,
509 .logicOpEnable = VK_FALSE,
510 .logicOp = VK_LOGIC_OP_CLEAR,
511 .attachmentCount = 1,
512 .pAttachments = &blend_attachment,
513 .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
514 };
515 blit_color_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({
516 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
517 .pNext = nullptr,
518 .flags = 0,
519 .stageCount = static_cast<u32>(stages.size()),
520 .pStages = stages.data(),
521 .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
522 .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
523 .pTessellationState = nullptr,
524 .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
525 .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
526 .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
527 .pDepthStencilState = nullptr,
528 .pColorBlendState = &color_blend_create_info,
529 .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
530 .layout = *one_texture_pipeline_layout,
531 .renderPass = key.renderpass,
532 .subpass = 0,
533 .basePipelineHandle = VK_NULL_HANDLE,
534 .basePipelineIndex = 0,
535 }));
536 return *blit_color_pipelines.back();
537}
538
539VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) {
540 if (blit_depth_stencil_pipeline) {
541 return *blit_depth_stencil_pipeline;
542 }
543 const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag);
544 blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({
545 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
546 .pNext = nullptr,
547 .flags = 0,
548 .stageCount = static_cast<u32>(stages.size()),
549 .pStages = stages.data(),
550 .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
551 .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
552 .pTessellationState = nullptr,
553 .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
554 .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
555 .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
556 .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
557 .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
558 .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
559 .layout = *two_textures_pipeline_layout,
560 .renderPass = renderpass,
561 .subpass = 0,
562 .basePipelineHandle = VK_NULL_HANDLE,
563 .basePipelineIndex = 0,
564 });
565 return *blit_depth_stencil_pipeline;
566}
567
568void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
569 if (pipeline) {
570 return;
571 }
572 const std::array stages = MakeStages(*full_screen_vert, *convert_depth_to_float_frag);
573 pipeline = device.GetLogical().CreateGraphicsPipeline({
574 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
575 .pNext = nullptr,
576 .flags = 0,
577 .stageCount = static_cast<u32>(stages.size()),
578 .pStages = stages.data(),
579 .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
580 .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
581 .pTessellationState = nullptr,
582 .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
583 .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
584 .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
585 .pDepthStencilState = nullptr,
586 .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
587 .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
588 .layout = *one_texture_pipeline_layout,
589 .renderPass = renderpass,
590 .subpass = 0,
591 .basePipelineHandle = VK_NULL_HANDLE,
592 .basePipelineIndex = 0,
593 });
594}
595
596void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
597 if (pipeline) {
598 return;
599 }
600 const std::array stages = MakeStages(*full_screen_vert, *convert_float_to_depth_frag);
601 pipeline = device.GetLogical().CreateGraphicsPipeline({
602 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
603 .pNext = nullptr,
604 .flags = 0,
605 .stageCount = static_cast<u32>(stages.size()),
606 .pStages = stages.data(),
607 .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
608 .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
609 .pTessellationState = nullptr,
610 .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
611 .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
612 .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
613 .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
614 .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
615 .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
616 .layout = *one_texture_pipeline_layout,
617 .renderPass = renderpass,
618 .subpass = 0,
619 .basePipelineHandle = VK_NULL_HANDLE,
620 .basePipelineIndex = 0,
621 });
622}
623
624} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
new file mode 100644
index 000000000..43fd3d737
--- /dev/null
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -0,0 +1,96 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <compare>
8
9#include "video_core/engines/fermi_2d.h"
10#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
11#include "video_core/texture_cache/types.h"
12#include "video_core/vulkan_common/vulkan_wrapper.h"
13
14namespace Vulkan {
15
16using VideoCommon::Offset2D;
17
18class Device;
19class Framebuffer;
20class ImageView;
21class StateTracker;
22class VKScheduler;
23
24struct BlitImagePipelineKey {
25 constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default;
26
27 VkRenderPass renderpass;
28 Tegra::Engines::Fermi2D::Operation operation;
29};
30
31class BlitImageHelper {
32public:
33 explicit BlitImageHelper(const Device& device, VKScheduler& scheduler,
34 StateTracker& state_tracker, VKDescriptorPool& descriptor_pool);
35 ~BlitImageHelper();
36
37 void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
38 const std::array<Offset2D, 2>& dst_region,
39 const std::array<Offset2D, 2>& src_region,
40 Tegra::Engines::Fermi2D::Filter filter,
41 Tegra::Engines::Fermi2D::Operation operation);
42
43 void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
44 VkImageView src_stencil_view, const std::array<Offset2D, 2>& dst_region,
45 const std::array<Offset2D, 2>& src_region,
46 Tegra::Engines::Fermi2D::Filter filter,
47 Tegra::Engines::Fermi2D::Operation operation);
48
49 void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
50
51 void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
52
53 void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
54
55 void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
56
57private:
58 void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
59 const ImageView& src_image_view);
60
61 [[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key);
62
63 [[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass);
64
65 void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
66
67 void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
68
69 const Device& device;
70 VKScheduler& scheduler;
71 StateTracker& state_tracker;
72
73 vk::DescriptorSetLayout one_texture_set_layout;
74 vk::DescriptorSetLayout two_textures_set_layout;
75 DescriptorAllocator one_texture_descriptor_allocator;
76 DescriptorAllocator two_textures_descriptor_allocator;
77 vk::PipelineLayout one_texture_pipeline_layout;
78 vk::PipelineLayout two_textures_pipeline_layout;
79 vk::ShaderModule full_screen_vert;
80 vk::ShaderModule blit_color_to_color_frag;
81 vk::ShaderModule blit_depth_stencil_frag;
82 vk::ShaderModule convert_depth_to_float_frag;
83 vk::ShaderModule convert_float_to_depth_frag;
84 vk::Sampler linear_sampler;
85 vk::Sampler nearest_sampler;
86
87 std::vector<BlitImagePipelineKey> blit_color_keys;
88 std::vector<vk::Pipeline> blit_color_pipelines;
89 vk::Pipeline blit_depth_stencil_pipeline;
90 vk::Pipeline convert_d32_to_r32_pipeline;
91 vk::Pipeline convert_r32_to_d32_pipeline;
92 vk::Pipeline convert_d16_to_r16_pipeline;
93 vk::Pipeline convert_r16_to_d16_pipeline;
94};
95
96} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 5ec43db11..5be6dabd9 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -60,6 +60,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta
60 logic_op.Assign(PackLogicOp(regs.logic_op.operation)); 60 logic_op.Assign(PackLogicOp(regs.logic_op.operation));
61 rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); 61 rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
62 topology.Assign(regs.draw.topology); 62 topology.Assign(regs.draw.topology);
63 msaa_mode.Assign(regs.multisample_mode);
63 64
64 raw2 = 0; 65 raw2 = 0;
65 const auto test_func = 66 const auto test_func =
@@ -75,7 +76,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta
75 regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0; 76 regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0;
76 } 77 }
77 78
78 for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { 79 for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
79 const auto& input = regs.vertex_attrib_format[index]; 80 const auto& input = regs.vertex_attrib_format[index];
80 auto& attribute = attributes[index]; 81 auto& attribute = attributes[index];
81 attribute.raw = 0; 82 attribute.raw = 0;
@@ -84,6 +85,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta
84 attribute.offset.Assign(input.offset); 85 attribute.offset.Assign(input.offset);
85 attribute.type.Assign(static_cast<u32>(input.type.Value())); 86 attribute.type.Assign(static_cast<u32>(input.type.Value()));
86 attribute.size.Assign(static_cast<u32>(input.size.Value())); 87 attribute.size.Assign(static_cast<u32>(input.size.Value()));
88 attribute.binding_index_enabled.Assign(regs.vertex_array[index].IsEnabled() ? 1 : 0);
87 } 89 }
88 90
89 for (std::size_t index = 0; index < std::size(attachments); ++index) { 91 for (std::size_t index = 0; index < std::size(attachments); ++index) {
@@ -171,14 +173,9 @@ void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) {
171 depth_test_func.Assign(PackComparisonOp(regs.depth_test_func)); 173 depth_test_func.Assign(PackComparisonOp(regs.depth_test_func));
172 cull_face.Assign(PackCullFace(regs.cull_face)); 174 cull_face.Assign(PackCullFace(regs.cull_face));
173 cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0); 175 cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0);
174 176 std::ranges::transform(regs.vertex_array, vertex_strides.begin(), [](const auto& array) {
175 for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { 177 return static_cast<u16>(array.stride.Value());
176 const auto& input = regs.vertex_array[index]; 178 });
177 VertexBinding& binding = vertex_bindings[index];
178 binding.raw = 0;
179 binding.enabled.Assign(input.IsEnabled() ? 1 : 0);
180 binding.stride.Assign(static_cast<u16>(input.stride.Value()));
181 }
182} 179}
183 180
184std::size_t FixedPipelineState::Hash() const noexcept { 181std::size_t FixedPipelineState::Hash() const noexcept {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index c26b77790..465a55fdb 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -96,6 +96,8 @@ struct FixedPipelineState {
96 BitField<6, 14, u32> offset; 96 BitField<6, 14, u32> offset;
97 BitField<20, 3, u32> type; 97 BitField<20, 3, u32> type;
98 BitField<23, 6, u32> size; 98 BitField<23, 6, u32> size;
99 // Not really an element of a vertex attribute, but it can be packed here
100 BitField<29, 1, u32> binding_index_enabled;
99 101
100 constexpr Maxwell::VertexAttribute::Type Type() const noexcept { 102 constexpr Maxwell::VertexAttribute::Type Type() const noexcept {
101 return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); 103 return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
@@ -130,12 +132,6 @@ struct FixedPipelineState {
130 } 132 }
131 }; 133 };
132 134
133 union VertexBinding {
134 u16 raw;
135 BitField<0, 12, u16> stride;
136 BitField<12, 1, u16> enabled;
137 };
138
139 struct DynamicState { 135 struct DynamicState {
140 union { 136 union {
141 u32 raw1; 137 u32 raw1;
@@ -153,7 +149,8 @@ struct FixedPipelineState {
153 BitField<0, 2, u32> cull_face; 149 BitField<0, 2, u32> cull_face;
154 BitField<2, 1, u32> cull_enable; 150 BitField<2, 1, u32> cull_enable;
155 }; 151 };
156 std::array<VertexBinding, Maxwell::NumVertexArrays> vertex_bindings; 152 // Vertex stride is a 12 bits value, we have 4 bits to spare per element
153 std::array<u16, Maxwell::NumVertexArrays> vertex_strides;
157 154
158 void Fill(const Maxwell& regs); 155 void Fill(const Maxwell& regs);
159 156
@@ -186,6 +183,7 @@ struct FixedPipelineState {
186 BitField<19, 4, u32> logic_op; 183 BitField<19, 4, u32> logic_op;
187 BitField<23, 1, u32> rasterize_enable; 184 BitField<23, 1, u32> rasterize_enable;
188 BitField<24, 4, Maxwell::PrimitiveTopology> topology; 185 BitField<24, 4, Maxwell::PrimitiveTopology> topology;
186 BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
189 }; 187 };
190 union { 188 union {
191 u32 raw2; 189 u32 raw2;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 58e117eb3..ca7c2c579 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -9,9 +9,9 @@
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "video_core/engines/maxwell_3d.h" 10#include "video_core/engines/maxwell_3d.h"
11#include "video_core/renderer_vulkan/maxwell_to_vk.h" 11#include "video_core/renderer_vulkan/maxwell_to_vk.h"
12#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/wrapper.h"
14#include "video_core/surface.h" 12#include "video_core/surface.h"
13#include "video_core/vulkan_common/vulkan_device.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h"
15 15
16namespace Vulkan::MaxwellToVK { 16namespace Vulkan::MaxwellToVK {
17 17
@@ -47,7 +47,7 @@ VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter
47 return {}; 47 return {};
48} 48}
49 49
50VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, 50VkSamplerAddressMode WrapMode(const Device& device, Tegra::Texture::WrapMode wrap_mode,
51 Tegra::Texture::TextureFilter filter) { 51 Tegra::Texture::TextureFilter filter) {
52 switch (wrap_mode) { 52 switch (wrap_mode) {
53 case Tegra::Texture::WrapMode::Wrap: 53 case Tegra::Texture::WrapMode::Wrap:
@@ -122,7 +122,7 @@ struct FormatTuple {
122 {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT 122 {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT
123 {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT 123 {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT
124 {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM 124 {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM
125 {VK_FORMAT_B5G6R5_UNORM_PACK16, Attachable}, // B5G6R5_UNORM 125 {VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5_UNORM
126 {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM 126 {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM
127 {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM 127 {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM
128 {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT 128 {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT
@@ -163,7 +163,7 @@ struct FormatTuple {
163 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM 163 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM
164 {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT 164 {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT
165 {VK_FORMAT_UNDEFINED}, // R16G16_UINT 165 {VK_FORMAT_UNDEFINED}, // R16G16_UINT
166 {VK_FORMAT_UNDEFINED}, // R16G16_SINT 166 {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT
167 {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM 167 {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM
168 {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT 168 {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT
169 {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB 169 {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB
@@ -222,7 +222,7 @@ constexpr bool IsZetaFormat(PixelFormat pixel_format) {
222 222
223} // Anonymous namespace 223} // Anonymous namespace
224 224
225FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format) { 225FormatInfo SurfaceFormat(const Device& device, FormatType format_type, PixelFormat pixel_format) {
226 ASSERT(static_cast<std::size_t>(pixel_format) < std::size(tex_format_tuples)); 226 ASSERT(static_cast<std::size_t>(pixel_format) < std::size(tex_format_tuples));
227 227
228 auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)]; 228 auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)];
@@ -233,18 +233,20 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo
233 233
234 // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively 234 // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively
235 if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { 235 if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
236 tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format) 236 const bool is_srgb = VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
237 ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 237 tuple.format = is_srgb ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 : VK_FORMAT_A8B8G8R8_UNORM_PACK32;
238 : VK_FORMAT_A8B8G8R8_UNORM_PACK32;
239 } 238 }
240 const bool attachable = tuple.usage & Attachable; 239 const bool attachable = tuple.usage & Attachable;
241 const bool storage = tuple.usage & Storage; 240 const bool storage = tuple.usage & Storage;
242 241
243 VkFormatFeatureFlags usage; 242 VkFormatFeatureFlags usage{};
244 if (format_type == FormatType::Buffer) { 243 switch (format_type) {
244 case FormatType::Buffer:
245 usage = 245 usage =
246 VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; 246 VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
247 } else { 247 break;
248 case FormatType::Linear:
249 case FormatType::Optimal:
248 usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT | 250 usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
249 VK_FORMAT_FEATURE_TRANSFER_SRC_BIT; 251 VK_FORMAT_FEATURE_TRANSFER_SRC_BIT;
250 if (attachable) { 252 if (attachable) {
@@ -254,6 +256,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo
254 if (storage) { 256 if (storage) {
255 usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; 257 usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
256 } 258 }
259 break;
257 } 260 }
258 return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; 261 return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage};
259} 262}
@@ -277,7 +280,7 @@ VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
277 return {}; 280 return {};
278} 281}
279 282
280VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, 283VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device,
281 Maxwell::PrimitiveTopology topology) { 284 Maxwell::PrimitiveTopology topology) {
282 switch (topology) { 285 switch (topology) {
283 case Maxwell::PrimitiveTopology::Points: 286 case Maxwell::PrimitiveTopology::Points:
@@ -523,7 +526,7 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
523 return {}; 526 return {};
524} 527}
525 528
526VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) { 529VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) {
527 switch (index_format) { 530 switch (index_format) {
528 case Maxwell::IndexFormat::UnsignedByte: 531 case Maxwell::IndexFormat::UnsignedByte:
529 if (!device.IsExtIndexTypeUint8Supported()) { 532 if (!device.IsExtIndexTypeUint8Supported()) {
@@ -724,4 +727,17 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle)
724 return {}; 727 return {};
725} 728}
726 729
730VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction) {
731 switch (reduction) {
732 case Tegra::Texture::SamplerReduction::WeightedAverage:
733 return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
734 case Tegra::Texture::SamplerReduction::Min:
735 return VK_SAMPLER_REDUCTION_MODE_MIN_EXT;
736 case Tegra::Texture::SamplerReduction::Max:
737 return VK_SAMPLER_REDUCTION_MODE_MAX_EXT;
738 }
739 UNREACHABLE_MSG("Invalid sampler mode={}", static_cast<int>(reduction));
740 return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
741}
742
727} // namespace Vulkan::MaxwellToVK 743} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 7e213452f..537969840 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -6,10 +6,10 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "video_core/engines/maxwell_3d.h" 8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_vulkan/vk_device.h"
10#include "video_core/renderer_vulkan/wrapper.h"
11#include "video_core/surface.h" 9#include "video_core/surface.h"
12#include "video_core/textures/texture.h" 10#include "video_core/textures/texture.h"
11#include "video_core/vulkan_common/vulkan_device.h"
12#include "video_core/vulkan_common/vulkan_wrapper.h"
13 13
14namespace Vulkan::MaxwellToVK { 14namespace Vulkan::MaxwellToVK {
15 15
@@ -22,7 +22,7 @@ VkFilter Filter(Tegra::Texture::TextureFilter filter);
22 22
23VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); 23VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
24 24
25VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, 25VkSamplerAddressMode WrapMode(const Device& device, Tegra::Texture::WrapMode wrap_mode,
26 Tegra::Texture::TextureFilter filter); 26 Tegra::Texture::TextureFilter filter);
27 27
28VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); 28VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
@@ -35,17 +35,17 @@ struct FormatInfo {
35 bool storage; 35 bool storage;
36}; 36};
37 37
38FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format); 38FormatInfo SurfaceFormat(const Device& device, FormatType format_type, PixelFormat pixel_format);
39 39
40VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); 40VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage);
41 41
42VkPrimitiveTopology PrimitiveTopology(const VKDevice& device, Maxwell::PrimitiveTopology topology); 42VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology);
43 43
44VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size); 44VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);
45 45
46VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); 46VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
47 47
48VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format); 48VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format);
49 49
50VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); 50VkStencilOp StencilOp(Maxwell::StencilOp stencil_op);
51 51
@@ -61,4 +61,6 @@ VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
61 61
62VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); 62VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
63 63
64VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
65
64} // namespace Vulkan::MaxwellToVK 66} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index ea4b7c1e6..d7437e185 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -12,8 +12,6 @@
12 12
13#include <fmt/format.h> 13#include <fmt/format.h>
14 14
15#include "common/dynamic_library.h"
16#include "common/file_util.h"
17#include "common/logging/log.h" 15#include "common/logging/log.h"
18#include "common/telemetry.h" 16#include "common/telemetry.h"
19#include "core/core.h" 17#include "core/core.h"
@@ -24,182 +22,27 @@
24#include "video_core/gpu.h" 22#include "video_core/gpu.h"
25#include "video_core/renderer_vulkan/renderer_vulkan.h" 23#include "video_core/renderer_vulkan/renderer_vulkan.h"
26#include "video_core/renderer_vulkan/vk_blit_screen.h" 24#include "video_core/renderer_vulkan/vk_blit_screen.h"
27#include "video_core/renderer_vulkan/vk_device.h"
28#include "video_core/renderer_vulkan/vk_master_semaphore.h" 25#include "video_core/renderer_vulkan/vk_master_semaphore.h"
29#include "video_core/renderer_vulkan/vk_memory_manager.h" 26#include "video_core/renderer_vulkan/vk_memory_manager.h"
30#include "video_core/renderer_vulkan/vk_rasterizer.h" 27#include "video_core/renderer_vulkan/vk_rasterizer.h"
31#include "video_core/renderer_vulkan/vk_scheduler.h" 28#include "video_core/renderer_vulkan/vk_scheduler.h"
32#include "video_core/renderer_vulkan/vk_state_tracker.h" 29#include "video_core/renderer_vulkan/vk_state_tracker.h"
33#include "video_core/renderer_vulkan/vk_swapchain.h" 30#include "video_core/renderer_vulkan/vk_swapchain.h"
34#include "video_core/renderer_vulkan/wrapper.h" 31#include "video_core/vulkan_common/vulkan_debug_callback.h"
35 32#include "video_core/vulkan_common/vulkan_device.h"
36// Include these late to avoid polluting previous headers 33#include "video_core/vulkan_common/vulkan_instance.h"
37#ifdef _WIN32 34#include "video_core/vulkan_common/vulkan_library.h"
38#include <windows.h> 35#include "video_core/vulkan_common/vulkan_surface.h"
39// ensure include order 36#include "video_core/vulkan_common/vulkan_wrapper.h"
40#include <vulkan/vulkan_win32.h>
41#endif
42
43#if !defined(_WIN32) && !defined(__APPLE__)
44#include <X11/Xlib.h>
45#include <vulkan/vulkan_wayland.h>
46#include <vulkan/vulkan_xlib.h>
47#endif
48 37
49namespace Vulkan { 38namespace Vulkan {
50
51namespace { 39namespace {
52
53using Core::Frontend::WindowSystemType;
54
55VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
56 VkDebugUtilsMessageTypeFlagsEXT type,
57 const VkDebugUtilsMessengerCallbackDataEXT* data,
58 [[maybe_unused]] void* user_data) {
59 const char* const message{data->pMessage};
60
61 if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
62 LOG_CRITICAL(Render_Vulkan, "{}", message);
63 } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) {
64 LOG_WARNING(Render_Vulkan, "{}", message);
65 } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) {
66 LOG_INFO(Render_Vulkan, "{}", message);
67 } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) {
68 LOG_DEBUG(Render_Vulkan, "{}", message);
69 }
70 return VK_FALSE;
71}
72
73Common::DynamicLibrary OpenVulkanLibrary() {
74 Common::DynamicLibrary library;
75#ifdef __APPLE__
76 // Check if a path to a specific Vulkan library has been specified.
77 char* libvulkan_env = getenv("LIBVULKAN_PATH");
78 if (!libvulkan_env || !library.Open(libvulkan_env)) {
79 // Use the libvulkan.dylib from the application bundle.
80 const std::string filename =
81 Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
82 library.Open(filename.c_str());
83 }
84#else
85 std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1);
86 if (!library.Open(filename.c_str())) {
87 // Android devices may not have libvulkan.so.1, only libvulkan.so.
88 filename = Common::DynamicLibrary::GetVersionedFilename("vulkan");
89 (void)library.Open(filename.c_str());
90 }
91#endif
92 return library;
93}
94
95std::pair<vk::Instance, u32> CreateInstance(
96 Common::DynamicLibrary& library, vk::InstanceDispatch& dld,
97 WindowSystemType window_type = WindowSystemType::Headless, bool enable_layers = false) {
98 if (!library.IsOpen()) {
99 LOG_ERROR(Render_Vulkan, "Vulkan library not available");
100 return {};
101 }
102 if (!library.GetSymbol("vkGetInstanceProcAddr", &dld.vkGetInstanceProcAddr)) {
103 LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan");
104 return {};
105 }
106 if (!vk::Load(dld)) {
107 LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers");
108 return {};
109 }
110
111 std::vector<const char*> extensions;
112 extensions.reserve(6);
113 switch (window_type) {
114 case Core::Frontend::WindowSystemType::Headless:
115 break;
116#ifdef _WIN32
117 case Core::Frontend::WindowSystemType::Windows:
118 extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
119 break;
120#endif
121#if !defined(_WIN32) && !defined(__APPLE__)
122 case Core::Frontend::WindowSystemType::X11:
123 extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
124 break;
125 case Core::Frontend::WindowSystemType::Wayland:
126 extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
127 break;
128#endif
129 default:
130 LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
131 break;
132 }
133 if (window_type != Core::Frontend::WindowSystemType::Headless) {
134 extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
135 }
136 if (enable_layers) {
137 extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
138 }
139 extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
140
141 const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld);
142 if (!properties) {
143 LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
144 return {};
145 }
146
147 for (const char* extension : extensions) {
148 const auto it =
149 std::find_if(properties->begin(), properties->end(), [extension](const auto& prop) {
150 return !std::strcmp(extension, prop.extensionName);
151 });
152 if (it == properties->end()) {
153 LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
154 return {};
155 }
156 }
157
158 std::vector<const char*> layers;
159 layers.reserve(1);
160 if (enable_layers) {
161 layers.push_back("VK_LAYER_KHRONOS_validation");
162 }
163
164 const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld);
165 if (!layer_properties) {
166 LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers");
167 layers.clear();
168 }
169
170 for (auto layer_it = layers.begin(); layer_it != layers.end();) {
171 const char* const layer = *layer_it;
172 const auto it = std::find_if(
173 layer_properties->begin(), layer_properties->end(),
174 [layer](const VkLayerProperties& prop) { return !std::strcmp(layer, prop.layerName); });
175 if (it == layer_properties->end()) {
176 LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer);
177 layer_it = layers.erase(layer_it);
178 } else {
179 ++layer_it;
180 }
181 }
182
183 // Limit the maximum version of Vulkan to avoid using untested version.
184 const u32 version = std::min(vk::AvailableVersion(dld), static_cast<u32>(VK_API_VERSION_1_1));
185
186 vk::Instance instance = vk::Instance::Create(version, layers, extensions, dld);
187 if (!instance) {
188 LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance");
189 return {};
190 }
191 if (!vk::Load(*instance, dld)) {
192 LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers");
193 }
194 return std::make_pair(std::move(instance), version);
195}
196
197std::string GetReadableVersion(u32 version) { 40std::string GetReadableVersion(u32 version) {
198 return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version), 41 return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
199 VK_VERSION_PATCH(version)); 42 VK_VERSION_PATCH(version));
200} 43}
201 44
202std::string GetDriverVersion(const VKDevice& device) { 45std::string GetDriverVersion(const Device& device) {
203 // Extracted from 46 // Extracted from
204 // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314 47 // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314
205 const u32 version = device.GetDriverVersion(); 48 const u32 version = device.GetDriverVersion();
@@ -216,7 +59,6 @@ std::string GetDriverVersion(const VKDevice& device) {
216 const u32 minor = version & 0x3fff; 59 const u32 minor = version & 0x3fff;
217 return fmt::format("{}.{}", major, minor); 60 return fmt::format("{}.{}", major, minor);
218 } 61 }
219
220 return GetReadableVersion(version); 62 return GetReadableVersion(version);
221} 63}
222 64
@@ -255,7 +97,6 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
255 if (!framebuffer) { 97 if (!framebuffer) {
256 return; 98 return;
257 } 99 }
258
259 const auto& layout = render_window.GetFramebufferLayout(); 100 const auto& layout = render_window.GetFramebufferLayout();
260 if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { 101 if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
261 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; 102 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
@@ -284,14 +125,16 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
284 render_window.OnFrameDisplayed(); 125 render_window.OnFrameDisplayed();
285} 126}
286 127
287bool RendererVulkan::Init() { 128bool RendererVulkan::Init() try {
288 library = OpenVulkanLibrary(); 129 library = OpenLibrary();
289 std::tie(instance, instance_version) = CreateInstance( 130 instance = CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
290 library, dld, render_window.GetWindowInfo().type, Settings::values.renderer_debug); 131 true, Settings::values.renderer_debug);
291 if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) { 132 if (Settings::values.renderer_debug) {
292 return false; 133 debug_callback = CreateDebugCallback(instance);
293 } 134 }
135 surface = CreateSurface(instance, render_window);
294 136
137 InitializeDevice();
295 Report(); 138 Report();
296 139
297 memory_manager = std::make_unique<VKMemoryManager>(*device); 140 memory_manager = std::make_unique<VKMemoryManager>(*device);
@@ -311,8 +154,11 @@ bool RendererVulkan::Init() {
311 blit_screen = 154 blit_screen =
312 std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device, 155 std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device,
313 *memory_manager, *swapchain, *scheduler, screen_info); 156 *memory_manager, *swapchain, *scheduler, screen_info);
314
315 return true; 157 return true;
158
159} catch (const vk::Exception& exception) {
160 LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
161 return false;
316} 162}
317 163
318void RendererVulkan::ShutDown() { 164void RendererVulkan::ShutDown() {
@@ -322,7 +168,6 @@ void RendererVulkan::ShutDown() {
322 if (const auto& dev = device->GetLogical()) { 168 if (const auto& dev = device->GetLogical()) {
323 dev.WaitIdle(); 169 dev.WaitIdle();
324 } 170 }
325
326 rasterizer.reset(); 171 rasterizer.reset();
327 blit_screen.reset(); 172 blit_screen.reset();
328 scheduler.reset(); 173 scheduler.reset();
@@ -331,95 +176,15 @@ void RendererVulkan::ShutDown() {
331 device.reset(); 176 device.reset();
332} 177}
333 178
334bool RendererVulkan::CreateDebugCallback() { 179void RendererVulkan::InitializeDevice() {
335 if (!Settings::values.renderer_debug) { 180 const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
336 return true;
337 }
338 debug_callback = instance.TryCreateDebugCallback(DebugCallback);
339 if (!debug_callback) {
340 LOG_ERROR(Render_Vulkan, "Failed to create debug callback");
341 return false;
342 }
343 return true;
344}
345
346bool RendererVulkan::CreateSurface() {
347 [[maybe_unused]] const auto& window_info = render_window.GetWindowInfo();
348 VkSurfaceKHR unsafe_surface = nullptr;
349
350#ifdef _WIN32
351 if (window_info.type == Core::Frontend::WindowSystemType::Windows) {
352 const HWND hWnd = static_cast<HWND>(window_info.render_surface);
353 const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
354 nullptr, 0, nullptr, hWnd};
355 const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>(
356 dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR"));
357 if (!vkCreateWin32SurfaceKHR ||
358 vkCreateWin32SurfaceKHR(*instance, &win32_ci, nullptr, &unsafe_surface) != VK_SUCCESS) {
359 LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface");
360 return false;
361 }
362 }
363#endif
364#if !defined(_WIN32) && !defined(__APPLE__)
365 if (window_info.type == Core::Frontend::WindowSystemType::X11) {
366 const VkXlibSurfaceCreateInfoKHR xlib_ci{
367 VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0,
368 static_cast<Display*>(window_info.display_connection),
369 reinterpret_cast<Window>(window_info.render_surface)};
370 const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>(
371 dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR"));
372 if (!vkCreateXlibSurfaceKHR ||
373 vkCreateXlibSurfaceKHR(*instance, &xlib_ci, nullptr, &unsafe_surface) != VK_SUCCESS) {
374 LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
375 return false;
376 }
377 }
378 if (window_info.type == Core::Frontend::WindowSystemType::Wayland) {
379 const VkWaylandSurfaceCreateInfoKHR wayland_ci{
380 VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0,
381 static_cast<wl_display*>(window_info.display_connection),
382 static_cast<wl_surface*>(window_info.render_surface)};
383 const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>(
384 dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR"));
385 if (!vkCreateWaylandSurfaceKHR ||
386 vkCreateWaylandSurfaceKHR(*instance, &wayland_ci, nullptr, &unsafe_surface) !=
387 VK_SUCCESS) {
388 LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
389 return false;
390 }
391 }
392#endif
393 if (!unsafe_surface) {
394 LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
395 return false;
396 }
397
398 surface = vk::SurfaceKHR(unsafe_surface, *instance, dld);
399 return true;
400}
401
402bool RendererVulkan::PickDevices() {
403 const auto devices = instance.EnumeratePhysicalDevices();
404 if (!devices) {
405 LOG_ERROR(Render_Vulkan, "Failed to enumerate physical devices");
406 return false;
407 }
408
409 const s32 device_index = Settings::values.vulkan_device.GetValue(); 181 const s32 device_index = Settings::values.vulkan_device.GetValue();
410 if (device_index < 0 || device_index >= static_cast<s32>(devices->size())) { 182 if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) {
411 LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); 183 LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
412 return false; 184 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
413 }
414 const vk::PhysicalDevice physical_device((*devices)[static_cast<std::size_t>(device_index)],
415 dld);
416 if (!VKDevice::IsSuitable(physical_device, *surface)) {
417 return false;
418 } 185 }
419 186 const vk::PhysicalDevice physical_device(devices[static_cast<size_t>(device_index)], dld);
420 device = 187 device = std::make_unique<Device>(*instance, physical_device, *surface, dld);
421 std::make_unique<VKDevice>(*instance, instance_version, physical_device, *surface, dld);
422 return device->Create();
423} 188}
424 189
425void RendererVulkan::Report() const { 190void RendererVulkan::Report() const {
@@ -444,25 +209,21 @@ void RendererVulkan::Report() const {
444 telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); 209 telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
445} 210}
446 211
447std::vector<std::string> RendererVulkan::EnumerateDevices() { 212std::vector<std::string> RendererVulkan::EnumerateDevices() try {
448 vk::InstanceDispatch dld; 213 vk::InstanceDispatch dld;
449 Common::DynamicLibrary library = OpenVulkanLibrary(); 214 const Common::DynamicLibrary library = OpenLibrary();
450 vk::Instance instance = CreateInstance(library, dld).first; 215 const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0);
451 if (!instance) { 216 const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices();
452 return {};
453 }
454
455 const std::optional physical_devices = instance.EnumeratePhysicalDevices();
456 if (!physical_devices) {
457 return {};
458 }
459
460 std::vector<std::string> names; 217 std::vector<std::string> names;
461 names.reserve(physical_devices->size()); 218 names.reserve(physical_devices.size());
462 for (const auto& device : *physical_devices) { 219 for (const VkPhysicalDevice device : physical_devices) {
463 names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName); 220 names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName);
464 } 221 }
465 return names; 222 return names;
223
224} catch (const vk::Exception& exception) {
225 LOG_ERROR(Render_Vulkan, "Failed to enumerate devices with error: {}", exception.what());
226 return {};
466} 227}
467 228
468} // namespace Vulkan 229} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 977b86003..5575ffc54 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -11,7 +11,7 @@
11#include "common/dynamic_library.h" 11#include "common/dynamic_library.h"
12 12
13#include "video_core/renderer_base.h" 13#include "video_core/renderer_base.h"
14#include "video_core/renderer_vulkan/wrapper.h" 14#include "video_core/vulkan_common/vulkan_wrapper.h"
15 15
16namespace Core { 16namespace Core {
17class TelemetrySession; 17class TelemetrySession;
@@ -27,16 +27,15 @@ class GPU;
27 27
28namespace Vulkan { 28namespace Vulkan {
29 29
30class Device;
30class StateTracker; 31class StateTracker;
31class VKBlitScreen; 32class VKBlitScreen;
32class VKDevice;
33class VKMemoryManager; 33class VKMemoryManager;
34class VKSwapchain; 34class VKSwapchain;
35class VKScheduler; 35class VKScheduler;
36class VKImage;
37 36
38struct VKScreenInfo { 37struct VKScreenInfo {
39 VKImage* image{}; 38 VkImageView image_view{};
40 u32 width{}; 39 u32 width{};
41 u32 height{}; 40 u32 height{};
42 bool is_srgb{}; 41 bool is_srgb{};
@@ -57,11 +56,7 @@ public:
57 static std::vector<std::string> EnumerateDevices(); 56 static std::vector<std::string> EnumerateDevices();
58 57
59private: 58private:
60 bool CreateDebugCallback(); 59 void InitializeDevice();
61
62 bool CreateSurface();
63
64 bool PickDevices();
65 60
66 void Report() const; 61 void Report() const;
67 62
@@ -73,14 +68,13 @@ private:
73 vk::InstanceDispatch dld; 68 vk::InstanceDispatch dld;
74 69
75 vk::Instance instance; 70 vk::Instance instance;
76 u32 instance_version{};
77 71
78 vk::SurfaceKHR surface; 72 vk::SurfaceKHR surface;
79 73
80 VKScreenInfo screen_info; 74 VKScreenInfo screen_info;
81 75
82 vk::DebugCallback debug_callback; 76 vk::DebugUtilsMessenger debug_callback;
83 std::unique_ptr<VKDevice> device; 77 std::unique_ptr<Device> device;
84 std::unique_ptr<VKMemoryManager> memory_manager; 78 std::unique_ptr<VKMemoryManager> memory_manager;
85 std::unique_ptr<StateTracker> state_tracker; 79 std::unique_ptr<StateTracker> state_tracker;
86 std::unique_ptr<VKScheduler> scheduler; 80 std::unique_ptr<VKScheduler> scheduler;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index b5b60309e..5e184eb42 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -16,121 +16,25 @@
16#include "core/frontend/emu_window.h" 16#include "core/frontend/emu_window.h"
17#include "core/memory.h" 17#include "core/memory.h"
18#include "video_core/gpu.h" 18#include "video_core/gpu.h"
19#include "video_core/morton.h" 19#include "video_core/host_shaders/vulkan_present_frag_spv.h"
20#include "video_core/host_shaders/vulkan_present_vert_spv.h"
20#include "video_core/rasterizer_interface.h" 21#include "video_core/rasterizer_interface.h"
21#include "video_core/renderer_vulkan/renderer_vulkan.h" 22#include "video_core/renderer_vulkan/renderer_vulkan.h"
22#include "video_core/renderer_vulkan/vk_blit_screen.h" 23#include "video_core/renderer_vulkan/vk_blit_screen.h"
23#include "video_core/renderer_vulkan/vk_device.h"
24#include "video_core/renderer_vulkan/vk_image.h"
25#include "video_core/renderer_vulkan/vk_master_semaphore.h" 24#include "video_core/renderer_vulkan/vk_master_semaphore.h"
26#include "video_core/renderer_vulkan/vk_memory_manager.h" 25#include "video_core/renderer_vulkan/vk_memory_manager.h"
27#include "video_core/renderer_vulkan/vk_scheduler.h" 26#include "video_core/renderer_vulkan/vk_scheduler.h"
28#include "video_core/renderer_vulkan/vk_shader_util.h" 27#include "video_core/renderer_vulkan/vk_shader_util.h"
29#include "video_core/renderer_vulkan/vk_swapchain.h" 28#include "video_core/renderer_vulkan/vk_swapchain.h"
30#include "video_core/renderer_vulkan/wrapper.h"
31#include "video_core/surface.h" 29#include "video_core/surface.h"
30#include "video_core/textures/decoders.h"
31#include "video_core/vulkan_common/vulkan_device.h"
32#include "video_core/vulkan_common/vulkan_wrapper.h"
32 33
33namespace Vulkan { 34namespace Vulkan {
34 35
35namespace { 36namespace {
36 37
37// Generated from the "shaders/" directory, read the instructions there.
38constexpr u8 blit_vertex_code[] = {
39 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x27, 0x00, 0x00, 0x00,
40 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
41 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
42 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
43 0x0f, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
44 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
45 0x25, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
46 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00,
47 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00,
48 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
49 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
50 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
51 0x48, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
52 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
53 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
54 0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00,
55 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
56 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
57 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
58 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
59 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
60 0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
61 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00,
62 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
63 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
64 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
65 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
66 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x06, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
67 0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
68 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
69 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
70 0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
71 0x0e, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x04, 0x00,
72 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00,
73 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
74 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
75 0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00,
76 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
77 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
78 0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
79 0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
80 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
81 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00,
82 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
83 0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
84 0x24, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
85 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
86 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
87 0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
88 0x13, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
89 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
90 0x1a, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
91 0x1d, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00,
92 0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
93 0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
94 0x1e, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x91, 0x00, 0x05, 0x00,
95 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00,
96 0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
97 0x0f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
98 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
99 0x3e, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00,
100 0x38, 0x00, 0x01, 0x00};
101
102constexpr u8 blit_fragment_code[] = {
103 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x14, 0x00, 0x00, 0x00,
104 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
105 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
106 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
107 0x0f, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
108 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00,
109 0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
110 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
111 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
112 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00,
113 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
114 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00,
115 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
116 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
117 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
118 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, 0x0a, 0x00, 0x00, 0x00,
119 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
120 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x03, 0x00,
121 0x0b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
122 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
123 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00,
124 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
125 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
126 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
127 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
128 0x05, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00,
129 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
130 0x11, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
131 0x0e, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00,
132 0x13, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
133
134struct ScreenRectVertex { 38struct ScreenRectVertex {
135 ScreenRectVertex() = default; 39 ScreenRectVertex() = default;
136 explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {} 40 explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {}
@@ -173,9 +77,9 @@ constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) {
173 // clang-format on 77 // clang-format on
174} 78}
175 79
176std::size_t GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { 80u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) {
177 using namespace VideoCore::Surface; 81 using namespace VideoCore::Surface;
178 return GetBytesPerPixel(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); 82 return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format));
179} 83}
180 84
181std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { 85std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) {
@@ -210,7 +114,7 @@ struct VKBlitScreen::BufferData {
210 114
211VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_, 115VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
212 Core::Frontend::EmuWindow& render_window_, 116 Core::Frontend::EmuWindow& render_window_,
213 VideoCore::RasterizerInterface& rasterizer_, const VKDevice& device_, 117 VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
214 VKMemoryManager& memory_manager_, VKSwapchain& swapchain_, 118 VKMemoryManager& memory_manager_, VKSwapchain& swapchain_,
215 VKScheduler& scheduler_, const VKScreenInfo& screen_info_) 119 VKScheduler& scheduler_, const VKScreenInfo& screen_info_)
216 : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_}, 120 : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_},
@@ -239,34 +143,30 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
239 scheduler.Wait(resource_ticks[image_index]); 143 scheduler.Wait(resource_ticks[image_index]);
240 resource_ticks[image_index] = scheduler.CurrentTick(); 144 resource_ticks[image_index] = scheduler.CurrentTick();
241 145
242 VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get(); 146 UpdateDescriptorSet(image_index,
243 147 use_accelerated ? screen_info.image_view : *raw_image_views[image_index]);
244 UpdateDescriptorSet(image_index, blit_image->GetPresentView());
245 148
246 BufferData data; 149 BufferData data;
247 SetUniformData(data, framebuffer); 150 SetUniformData(data, framebuffer);
248 SetVertexData(data, framebuffer); 151 SetVertexData(data, framebuffer);
249 152
250 auto map = buffer_commit->Map(); 153 auto map = buffer_commit->Map();
251 std::memcpy(map.GetAddress(), &data, sizeof(data)); 154 std::memcpy(map.Address(), &data, sizeof(data));
252 155
253 if (!use_accelerated) { 156 if (!use_accelerated) {
254 const u64 image_offset = GetRawImageOffset(framebuffer, image_index); 157 const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
255 158
256 const auto pixel_format =
257 VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
258 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; 159 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
259 const auto host_ptr = cpu_memory.GetPointer(framebuffer_addr); 160 const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
260 rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer)); 161 const size_t size_bytes = GetSizeInBytes(framebuffer);
162 rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes);
261 163
262 // TODO(Rodrigo): Read this from HLE 164 // TODO(Rodrigo): Read this from HLE
263 constexpr u32 block_height_log2 = 4; 165 constexpr u32 block_height_log2 = 4;
264 VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, 166 const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
265 framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, 167 Tegra::Texture::UnswizzleTexture(
266 map.GetAddress() + image_offset, host_ptr); 168 std::span(map.Address() + image_offset, size_bytes), std::span(host_ptr, size_bytes),
267 169 bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
268 blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT,
269 VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
270 170
271 const VkBufferImageCopy copy{ 171 const VkBufferImageCopy copy{
272 .bufferOffset = image_offset, 172 .bufferOffset = image_offset,
@@ -288,15 +188,44 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
288 }, 188 },
289 }; 189 };
290 scheduler.Record( 190 scheduler.Record(
291 [buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) { 191 [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) {
292 cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); 192 const VkImageMemoryBarrier base_barrier{
193 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
194 .pNext = nullptr,
195 .srcAccessMask = 0,
196 .dstAccessMask = 0,
197 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
198 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
199 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
200 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
201 .image = image,
202 .subresourceRange =
203 {
204 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
205 .baseMipLevel = 0,
206 .levelCount = 1,
207 .baseArrayLayer = 0,
208 .layerCount = 1,
209 },
210 };
211 VkImageMemoryBarrier read_barrier = base_barrier;
212 read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
213 read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
214 read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
215
216 VkImageMemoryBarrier write_barrier = base_barrier;
217 write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
218 write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
219
220 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
221 0, read_barrier);
222 cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
223 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
224 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
293 }); 225 });
294 } 226 }
295 map.Release(); 227 map.Release();
296 228
297 blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
298 VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
299
300 scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], 229 scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index],
301 descriptor_set = descriptor_sets[image_index], buffer = *buffer, 230 descriptor_set = descriptor_sets[image_index], buffer = *buffer,
302 size = swapchain.GetSize(), pipeline = *pipeline, 231 size = swapchain.GetSize(), pipeline = *pipeline,
@@ -304,31 +233,31 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
304 const VkClearValue clear_color{ 233 const VkClearValue clear_color{
305 .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}}, 234 .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}},
306 }; 235 };
307 236 const VkRenderPassBeginInfo renderpass_bi{
308 VkRenderPassBeginInfo renderpass_bi; 237 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
309 renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; 238 .pNext = nullptr,
310 renderpass_bi.pNext = nullptr; 239 .renderPass = renderpass,
311 renderpass_bi.renderPass = renderpass; 240 .framebuffer = framebuffer,
312 renderpass_bi.framebuffer = framebuffer; 241 .renderArea =
313 renderpass_bi.renderArea.offset.x = 0; 242 {
314 renderpass_bi.renderArea.offset.y = 0; 243 .offset = {0, 0},
315 renderpass_bi.renderArea.extent = size; 244 .extent = size,
316 renderpass_bi.clearValueCount = 1; 245 },
317 renderpass_bi.pClearValues = &clear_color; 246 .clearValueCount = 1,
318 247 .pClearValues = &clear_color,
319 VkViewport viewport; 248 };
320 viewport.x = 0.0f; 249 const VkViewport viewport{
321 viewport.y = 0.0f; 250 .x = 0.0f,
322 viewport.width = static_cast<float>(size.width); 251 .y = 0.0f,
323 viewport.height = static_cast<float>(size.height); 252 .width = static_cast<float>(size.width),
324 viewport.minDepth = 0.0f; 253 .height = static_cast<float>(size.height),
325 viewport.maxDepth = 1.0f; 254 .minDepth = 0.0f,
326 255 .maxDepth = 1.0f,
327 VkRect2D scissor; 256 };
328 scissor.offset.x = 0; 257 const VkRect2D scissor{
329 scissor.offset.y = 0; 258 .offset = {0, 0},
330 scissor.extent = size; 259 .extent = size,
331 260 };
332 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); 261 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
333 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); 262 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
334 cmdbuf.SetViewport(0, viewport); 263 cmdbuf.SetViewport(0, viewport);
@@ -372,8 +301,8 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer)
372} 301}
373 302
374void VKBlitScreen::CreateShaders() { 303void VKBlitScreen::CreateShaders() {
375 vertex_shader = BuildShader(device, sizeof(blit_vertex_code), blit_vertex_code); 304 vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV);
376 fragment_shader = BuildShader(device, sizeof(blit_fragment_code), blit_fragment_code); 305 fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
377} 306}
378 307
379void VKBlitScreen::CreateSemaphores() { 308void VKBlitScreen::CreateSemaphores() {
@@ -420,7 +349,7 @@ void VKBlitScreen::CreateRenderPass() {
420 349
421 const VkAttachmentReference color_attachment_ref{ 350 const VkAttachmentReference color_attachment_ref{
422 .attachment = 0, 351 .attachment = 0,
423 .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 352 .layout = VK_IMAGE_LAYOUT_GENERAL,
424 }; 353 };
425 354
426 const VkSubpassDescription subpass_description{ 355 const VkSubpassDescription subpass_description{
@@ -735,34 +664,56 @@ void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuff
735 664
736void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { 665void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
737 raw_images.resize(image_count); 666 raw_images.resize(image_count);
667 raw_image_views.resize(image_count);
738 raw_buffer_commits.resize(image_count); 668 raw_buffer_commits.resize(image_count);
739 669
740 const VkImageCreateInfo ci{ 670 for (size_t i = 0; i < image_count; ++i) {
741 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 671 raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
742 .pNext = nullptr, 672 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
743 .flags = 0, 673 .pNext = nullptr,
744 .imageType = VK_IMAGE_TYPE_2D, 674 .flags = 0,
745 .format = GetFormat(framebuffer), 675 .imageType = VK_IMAGE_TYPE_2D,
746 .extent = 676 .format = GetFormat(framebuffer),
747 { 677 .extent =
748 .width = framebuffer.width, 678 {
749 .height = framebuffer.height, 679 .width = framebuffer.width,
750 .depth = 1, 680 .height = framebuffer.height,
751 }, 681 .depth = 1,
752 .mipLevels = 1, 682 },
753 .arrayLayers = 1, 683 .mipLevels = 1,
754 .samples = VK_SAMPLE_COUNT_1_BIT, 684 .arrayLayers = 1,
755 .tiling = VK_IMAGE_TILING_LINEAR, 685 .samples = VK_SAMPLE_COUNT_1_BIT,
756 .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 686 .tiling = VK_IMAGE_TILING_LINEAR,
757 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 687 .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
758 .queueFamilyIndexCount = 0, 688 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
759 .pQueueFamilyIndices = nullptr, 689 .queueFamilyIndexCount = 0,
760 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, 690 .pQueueFamilyIndices = nullptr,
761 }; 691 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
762 692 });
763 for (std::size_t i = 0; i < image_count; ++i) { 693 raw_buffer_commits[i] = memory_manager.Commit(raw_images[i], false);
764 raw_images[i] = std::make_unique<VKImage>(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT); 694 raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
765 raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false); 695 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
696 .pNext = nullptr,
697 .flags = 0,
698 .image = *raw_images[i],
699 .viewType = VK_IMAGE_VIEW_TYPE_2D,
700 .format = GetFormat(framebuffer),
701 .components =
702 {
703 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
704 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
705 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
706 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
707 },
708 .subresourceRange =
709 {
710 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
711 .baseMipLevel = 0,
712 .levelCount = 1,
713 .baseArrayLayer = 0,
714 .layerCount = 1,
715 },
716 });
766 } 717 }
767} 718}
768 719
@@ -789,7 +740,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag
789 const VkDescriptorImageInfo image_info{ 740 const VkDescriptorImageInfo image_info{
790 .sampler = *sampler, 741 .sampler = *sampler,
791 .imageView = image_view, 742 .imageView = image_view,
792 .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 743 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
793 }; 744 };
794 745
795 const VkWriteDescriptorSet sampler_write{ 746 const VkWriteDescriptorSet sampler_write{
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index 8f2839214..69ed61770 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -7,7 +7,7 @@
7#include <memory> 7#include <memory>
8 8
9#include "video_core/renderer_vulkan/vk_memory_manager.h" 9#include "video_core/renderer_vulkan/vk_memory_manager.h"
10#include "video_core/renderer_vulkan/wrapper.h" 10#include "video_core/vulkan_common/vulkan_wrapper.h"
11 11
12namespace Core { 12namespace Core {
13class System; 13class System;
@@ -33,9 +33,8 @@ namespace Vulkan {
33 33
34struct ScreenInfo; 34struct ScreenInfo;
35 35
36class Device;
36class RasterizerVulkan; 37class RasterizerVulkan;
37class VKDevice;
38class VKImage;
39class VKScheduler; 38class VKScheduler;
40class VKSwapchain; 39class VKSwapchain;
41 40
@@ -43,7 +42,7 @@ class VKBlitScreen final {
43public: 42public:
44 explicit VKBlitScreen(Core::Memory::Memory& cpu_memory, 43 explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
45 Core::Frontend::EmuWindow& render_window, 44 Core::Frontend::EmuWindow& render_window,
46 VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, 45 VideoCore::RasterizerInterface& rasterizer, const Device& device,
47 VKMemoryManager& memory_manager, VKSwapchain& swapchain, 46 VKMemoryManager& memory_manager, VKSwapchain& swapchain,
48 VKScheduler& scheduler, const VKScreenInfo& screen_info); 47 VKScheduler& scheduler, const VKScreenInfo& screen_info);
49 ~VKBlitScreen(); 48 ~VKBlitScreen();
@@ -86,7 +85,7 @@ private:
86 Core::Memory::Memory& cpu_memory; 85 Core::Memory::Memory& cpu_memory;
87 Core::Frontend::EmuWindow& render_window; 86 Core::Frontend::EmuWindow& render_window;
88 VideoCore::RasterizerInterface& rasterizer; 87 VideoCore::RasterizerInterface& rasterizer;
89 const VKDevice& device; 88 const Device& device;
90 VKMemoryManager& memory_manager; 89 VKMemoryManager& memory_manager;
91 VKSwapchain& swapchain; 90 VKSwapchain& swapchain;
92 VKScheduler& scheduler; 91 VKScheduler& scheduler;
@@ -110,7 +109,8 @@ private:
110 std::vector<u64> resource_ticks; 109 std::vector<u64> resource_ticks;
111 110
112 std::vector<vk::Semaphore> semaphores; 111 std::vector<vk::Semaphore> semaphores;
113 std::vector<std::unique_ptr<VKImage>> raw_images; 112 std::vector<vk::Image> raw_images;
113 std::vector<vk::ImageView> raw_image_views;
114 std::vector<VKMemoryCommit> raw_buffer_commits; 114 std::vector<VKMemoryCommit> raw_buffer_commits;
115 u32 raw_width = 0; 115 u32 raw_width = 0;
116 u32 raw_height = 0; 116 u32 raw_height = 0;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 444d3fb93..4d517c547 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -9,10 +9,10 @@
9#include "core/core.h" 9#include "core/core.h"
10#include "video_core/buffer_cache/buffer_cache.h" 10#include "video_core/buffer_cache/buffer_cache.h"
11#include "video_core/renderer_vulkan/vk_buffer_cache.h" 11#include "video_core/renderer_vulkan/vk_buffer_cache.h"
12#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_scheduler.h" 12#include "video_core/renderer_vulkan/vk_scheduler.h"
14#include "video_core/renderer_vulkan/vk_stream_buffer.h" 13#include "video_core/renderer_vulkan/vk_stream_buffer.h"
15#include "video_core/renderer_vulkan/wrapper.h" 14#include "video_core/vulkan_common/vulkan_device.h"
15#include "video_core/vulkan_common/vulkan_wrapper.h"
16 16
17namespace Vulkan { 17namespace Vulkan {
18 18
@@ -31,15 +31,19 @@ constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
31 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | 31 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
32 VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; 32 VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT;
33 33
34std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { 34constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS =
35 return std::make_unique<VKStreamBuffer>(device, scheduler, BUFFER_USAGE); 35 VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
36
37std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const Device& device, VKScheduler& scheduler) {
38 return std::make_unique<VKStreamBuffer>(device, scheduler);
36} 39}
37 40
38} // Anonymous namespace 41} // Anonymous namespace
39 42
40Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, 43Buffer::Buffer(const Device& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
41 VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) 44 VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_)
42 : BufferBlock{cpu_addr_, size_}, scheduler{scheduler_}, staging_pool{staging_pool_} { 45 : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{
46 staging_pool_} {
43 const VkBufferCreateInfo ci{ 47 const VkBufferCreateInfo ci{
44 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 48 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
45 .pNext = nullptr, 49 .pNext = nullptr,
@@ -64,24 +68,39 @@ void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
64 scheduler.RequestOutsideRenderPassOperationContext(); 68 scheduler.RequestOutsideRenderPassOperationContext();
65 69
66 const VkBuffer handle = Handle(); 70 const VkBuffer handle = Handle();
67 scheduler.Record( 71 scheduler.Record([staging = *staging.handle, handle, offset, data_size,
68 [staging = *staging.handle, handle, offset, data_size](vk::CommandBuffer cmdbuf) { 72 &device = device](vk::CommandBuffer cmdbuf) {
69 cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); 73 const VkBufferMemoryBarrier read_barrier{
70 74 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
71 const VkBufferMemoryBarrier barrier{ 75 .pNext = nullptr,
72 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, 76 .srcAccessMask =
73 .pNext = nullptr, 77 VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT |
74 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 78 VK_ACCESS_HOST_WRITE_BIT |
75 .dstAccessMask = UPLOAD_ACCESS_BARRIERS, 79 (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0),
76 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 80 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
77 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 81 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
78 .buffer = handle, 82 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
79 .offset = offset, 83 .buffer = handle,
80 .size = data_size, 84 .offset = offset,
81 }; 85 .size = data_size,
82 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, 86 };
83 barrier, {}); 87 const VkBufferMemoryBarrier write_barrier{
84 }); 88 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
89 .pNext = nullptr,
90 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
91 .dstAccessMask = UPLOAD_ACCESS_BARRIERS,
92 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
93 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
94 .buffer = handle,
95 .offset = offset,
96 .size = data_size,
97 };
98 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
99 0, read_barrier);
100 cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size});
101 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0,
102 write_barrier);
103 });
85} 104}
86 105
87void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { 106void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
@@ -149,9 +168,11 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
149 168
150VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, 169VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_,
151 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 170 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
152 const VKDevice& device_, VKMemoryManager& memory_manager_, 171 const Device& device_, VKMemoryManager& memory_manager_,
153 VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_) 172 VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_,
154 : BufferCache{rasterizer_, gpu_memory_, cpu_memory_, CreateStreamBuffer(device_, scheduler_)}, 173 VKStagingBufferPool& staging_pool_)
174 : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_,
175 cpu_memory_, stream_buffer_},
155 device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ 176 device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{
156 staging_pool_} {} 177 staging_pool_} {}
157 178
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 6008b8373..1c39aed34 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -11,17 +11,17 @@
11#include "video_core/renderer_vulkan/vk_memory_manager.h" 11#include "video_core/renderer_vulkan/vk_memory_manager.h"
12#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 12#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
13#include "video_core/renderer_vulkan/vk_stream_buffer.h" 13#include "video_core/renderer_vulkan/vk_stream_buffer.h"
14#include "video_core/renderer_vulkan/wrapper.h" 14#include "video_core/vulkan_common/vulkan_wrapper.h"
15 15
16namespace Vulkan { 16namespace Vulkan {
17 17
18class VKDevice; 18class Device;
19class VKMemoryManager; 19class VKMemoryManager;
20class VKScheduler; 20class VKScheduler;
21 21
22class Buffer final : public VideoCommon::BufferBlock { 22class Buffer final : public VideoCommon::BufferBlock {
23public: 23public:
24 explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, 24 explicit Buffer(const Device& device, VKMemoryManager& memory_manager, VKScheduler& scheduler,
25 VKStagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_); 25 VKStagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_);
26 ~Buffer(); 26 ~Buffer();
27 27
@@ -41,6 +41,7 @@ public:
41 } 41 }
42 42
43private: 43private:
44 const Device& device;
44 VKScheduler& scheduler; 45 VKScheduler& scheduler;
45 VKStagingBufferPool& staging_pool; 46 VKStagingBufferPool& staging_pool;
46 47
@@ -49,10 +50,11 @@ private:
49 50
50class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { 51class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
51public: 52public:
52 explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, 53 explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
53 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 54 Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
54 const VKDevice& device_, VKMemoryManager& memory_manager_, 55 const Device& device, VKMemoryManager& memory_manager,
55 VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_); 56 VKScheduler& scheduler, VKStreamBuffer& stream_buffer,
57 VKStagingBufferPool& staging_pool);
56 ~VKBufferCache(); 58 ~VKBufferCache();
57 59
58 BufferInfo GetEmptyBuffer(std::size_t size) override; 60 BufferInfo GetEmptyBuffer(std::size_t size) override;
@@ -61,7 +63,7 @@ protected:
61 std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; 63 std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
62 64
63private: 65private:
64 const VKDevice& device; 66 const Device& device;
65 VKMemoryManager& memory_manager; 67 VKMemoryManager& memory_manager;
66 VKScheduler& scheduler; 68 VKScheduler& scheduler;
67 VKStagingBufferPool& staging_pool; 69 VKStagingBufferPool& staging_pool;
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp
index 8f7d6410e..a99df9323 100644
--- a/src/video_core/renderer_vulkan/vk_command_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp
@@ -5,8 +5,8 @@
5#include <cstddef> 5#include <cstddef>
6 6
7#include "video_core/renderer_vulkan/vk_command_pool.h" 7#include "video_core/renderer_vulkan/vk_command_pool.h"
8#include "video_core/renderer_vulkan/vk_device.h" 8#include "video_core/vulkan_common/vulkan_device.h"
9#include "video_core/renderer_vulkan/wrapper.h" 9#include "video_core/vulkan_common/vulkan_wrapper.h"
10 10
11namespace Vulkan { 11namespace Vulkan {
12 12
@@ -17,7 +17,7 @@ struct CommandPool::Pool {
17 vk::CommandBuffers cmdbufs; 17 vk::CommandBuffers cmdbufs;
18}; 18};
19 19
20CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const VKDevice& device_) 20CommandPool::CommandPool(MasterSemaphore& master_semaphore_, const Device& device_)
21 : ResourcePool(master_semaphore_, COMMAND_BUFFER_POOL_SIZE), device{device_} {} 21 : ResourcePool(master_semaphore_, COMMAND_BUFFER_POOL_SIZE), device{device_} {}
22 22
23CommandPool::~CommandPool() = default; 23CommandPool::~CommandPool() = default;
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h
index 62a7ce3f1..61c26a22a 100644
--- a/src/video_core/renderer_vulkan/vk_command_pool.h
+++ b/src/video_core/renderer_vulkan/vk_command_pool.h
@@ -8,16 +8,16 @@
8#include <vector> 8#include <vector>
9 9
10#include "video_core/renderer_vulkan/vk_resource_pool.h" 10#include "video_core/renderer_vulkan/vk_resource_pool.h"
11#include "video_core/renderer_vulkan/wrapper.h" 11#include "video_core/vulkan_common/vulkan_wrapper.h"
12 12
13namespace Vulkan { 13namespace Vulkan {
14 14
15class Device;
15class MasterSemaphore; 16class MasterSemaphore;
16class VKDevice;
17 17
18class CommandPool final : public ResourcePool { 18class CommandPool final : public ResourcePool {
19public: 19public:
20 explicit CommandPool(MasterSemaphore& master_semaphore_, const VKDevice& device_); 20 explicit CommandPool(MasterSemaphore& master_semaphore_, const Device& device_);
21 ~CommandPool() override; 21 ~CommandPool() override;
22 22
23 void Allocate(size_t begin, size_t end) override; 23 void Allocate(size_t begin, size_t end) override;
@@ -27,7 +27,7 @@ public:
27private: 27private:
28 struct Pool; 28 struct Pool;
29 29
30 const VKDevice& device; 30 const Device& device;
31 std::vector<Pool> pools; 31 std::vector<Pool> pools;
32}; 32};
33 33
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 1ac7e2a30..02a6d54b7 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -10,111 +10,21 @@
10#include "common/alignment.h" 10#include "common/alignment.h"
11#include "common/assert.h" 11#include "common/assert.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h"
14#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
15#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
13#include "video_core/renderer_vulkan/vk_compute_pass.h" 16#include "video_core/renderer_vulkan/vk_compute_pass.h"
14#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 17#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
15#include "video_core/renderer_vulkan/vk_device.h"
16#include "video_core/renderer_vulkan/vk_scheduler.h" 18#include "video_core/renderer_vulkan/vk_scheduler.h"
17#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 19#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
18#include "video_core/renderer_vulkan/vk_update_descriptor.h" 20#include "video_core/renderer_vulkan/vk_update_descriptor.h"
19#include "video_core/renderer_vulkan/wrapper.h" 21#include "video_core/vulkan_common/vulkan_device.h"
22#include "video_core/vulkan_common/vulkan_wrapper.h"
20 23
21namespace Vulkan { 24namespace Vulkan {
22 25
23namespace { 26namespace {
24 27
25// Quad array SPIR-V module. Generated from the "shaders/" directory, read the instructions there.
26constexpr u8 quad_array[] = {
27 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x54, 0x00, 0x00, 0x00,
28 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
29 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
30 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
31 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
32 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
33 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
34 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
35 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
36 0x48, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
37 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
38 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
39 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
40 0x48, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
41 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
42 0x47, 0x00, 0x04, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
43 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00,
44 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
45 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
46 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
47 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
48 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
49 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
50 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
51 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
52 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
53 0x1e, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
54 0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
55 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
56 0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00,
57 0x1b, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
58 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
59 0x3b, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
60 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
61 0x20, 0x00, 0x04, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
62 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
63 0x1c, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
64 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
65 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
66 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
67 0x2c, 0x00, 0x09, 0x00, 0x34, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
68 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
69 0x37, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
70 0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x44, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
71 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
72 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
73 0x00, 0x04, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
74 0x49, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
75 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
76 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00,
77 0x3b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00,
78 0xf8, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x4b, 0x00, 0x00, 0x00,
79 0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00,
80 0xf8, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
81 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
82 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
83 0x06, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
84 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
85 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
86 0x17, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
87 0x19, 0x00, 0x00, 0x00, 0xae, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
88 0x12, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
89 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
90 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
91 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
92 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf5, 0x00, 0x07, 0x00,
93 0x06, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
94 0x48, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00,
95 0x27, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00,
96 0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
97 0x27, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
98 0x22, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
99 0x2b, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
100 0x2f, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
101 0x32, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
102 0x06, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
103 0x3e, 0x00, 0x03, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
104 0x07, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00,
105 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
106 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
107 0x3d, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
108 0x12, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x44, 0x00, 0x00, 0x00,
109 0x45, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
110 0x3e, 0x00, 0x03, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
111 0x06, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
112 0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00,
113 0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00,
114 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00,
115 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
116};
117
118VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { 28VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
119 return { 29 return {
120 .binding = 0, 30 .binding = 0,
@@ -144,208 +54,6 @@ VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
144 }; 54 };
145} 55}
146 56
147// Uint8 SPIR-V module. Generated from the "shaders/" directory.
148constexpr u8 uint8_pass[] = {
149 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00,
150 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
151 0x51, 0x11, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x61, 0x11, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00,
152 0x53, 0x50, 0x56, 0x5f, 0x4b, 0x48, 0x52, 0x5f, 0x31, 0x36, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74,
153 0x6f, 0x72, 0x61, 0x67, 0x65, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x53, 0x50, 0x56, 0x5f,
154 0x4b, 0x48, 0x52, 0x5f, 0x38, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
155 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c,
156 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00,
157 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00,
158 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
159 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
160 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
161 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
162 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
163 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00,
164 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
165 0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
166 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
167 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00,
168 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00,
169 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00,
170 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
171 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
172 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
173 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2e, 0x00, 0x00, 0x00,
174 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
175 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
176 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
177 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00,
178 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
179 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
180 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
181 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
182 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
183 0x11, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
184 0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00,
185 0x12, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
186 0x13, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
187 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
188 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
189 0x1e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
190 0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00,
191 0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
192 0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
193 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
194 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
195 0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
196 0x1e, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
197 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00,
198 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
199 0x2c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
200 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
201 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
202 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
203 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
204 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
205 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
206 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
207 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
208 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
209 0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
210 0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
211 0xf7, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
212 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
213 0x1c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
214 0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
215 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
216 0x15, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
217 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x71, 0x00, 0x04, 0x00,
218 0x1e, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
219 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
220 0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
221 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
222 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
223};
224
225// Quad indexed SPIR-V module. Generated from the "shaders/" directory.
226constexpr u8 QUAD_INDEXED_SPV[] = {
227 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00,
228 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
229 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
230 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
231 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
232 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
233 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
234 0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
235 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
236 0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
237 0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
238 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
239 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
240 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
241 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
242 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
243 0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00,
244 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
245 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
246 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
247 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
248 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
249 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
250 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
251 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
252 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
253 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
254 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
255 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
256 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
257 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
258 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
259 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
260 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
261 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00,
262 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
263 0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
264 0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
265 0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
266 0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
267 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
268 0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
269 0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
270 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00,
271 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
272 0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
273 0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
274 0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00,
275 0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
276 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
277 0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00,
278 0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00,
279 0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
280 0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
281 0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
282 0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
283 0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00,
284 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
285 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
286 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
287 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
288 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00,
289 0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
290 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
291 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00,
292 0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00,
293 0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00,
294 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00,
295 0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00,
296 0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
297 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
298 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
299 0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
300 0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
301 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
302 0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
303 0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
304 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
305 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
306 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
307 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
308 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
309 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
310 0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
311 0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
312 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00,
313 0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
314 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00,
315 0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
316 0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
317 0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00,
318 0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
319 0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00,
320 0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
321 0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
322 0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00,
323 0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
324 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
325 0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
326 0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
327 0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00,
328 0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
329 0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
330 0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
331 0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00,
332 0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00,
333 0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
334 0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
335 0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00,
336 0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
337 0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00,
338 0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
339 0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00,
340 0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00,
341 0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
342 0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
343 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00,
344 0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00,
345 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00,
346 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
347};
348
349std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { 57std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
350 return {{ 58 return {{
351 { 59 {
@@ -378,11 +86,11 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
378 86
379} // Anonymous namespace 87} // Anonymous namespace
380 88
381VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, 89VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
382 vk::Span<VkDescriptorSetLayoutBinding> bindings, 90 vk::Span<VkDescriptorSetLayoutBinding> bindings,
383 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, 91 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
384 vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, 92 vk::Span<VkPushConstantRange> push_constants,
385 const u8* code) { 93 std::span<const u32> code) {
386 descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ 94 descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
387 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 95 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
388 .pNext = nullptr, 96 .pNext = nullptr,
@@ -390,7 +98,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
390 .bindingCount = bindings.size(), 98 .bindingCount = bindings.size(),
391 .pBindings = bindings.data(), 99 .pBindings = bindings.data(),
392 }); 100 });
393
394 layout = device.GetLogical().CreatePipelineLayout({ 101 layout = device.GetLogical().CreatePipelineLayout({
395 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 102 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
396 .pNext = nullptr, 103 .pNext = nullptr,
@@ -400,7 +107,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
400 .pushConstantRangeCount = push_constants.size(), 107 .pushConstantRangeCount = push_constants.size(),
401 .pPushConstantRanges = push_constants.data(), 108 .pPushConstantRanges = push_constants.data(),
402 }); 109 });
403
404 if (!templates.empty()) { 110 if (!templates.empty()) {
405 descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({ 111 descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({
406 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, 112 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
@@ -417,18 +123,13 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
417 123
418 descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); 124 descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
419 } 125 }
420
421 auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1);
422 std::memcpy(code_copy.get(), code, code_size);
423
424 module = device.GetLogical().CreateShaderModule({ 126 module = device.GetLogical().CreateShaderModule({
425 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 127 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
426 .pNext = nullptr, 128 .pNext = nullptr,
427 .flags = 0, 129 .flags = 0,
428 .codeSize = code_size, 130 .codeSize = static_cast<u32>(code.size_bytes()),
429 .pCode = code_copy.get(), 131 .pCode = code.data(),
430 }); 132 });
431
432 pipeline = device.GetLogical().CreateComputePipeline({ 133 pipeline = device.GetLogical().CreateComputePipeline({
433 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 134 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
434 .pNext = nullptr, 135 .pNext = nullptr,
@@ -461,13 +162,13 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet(
461 return set; 162 return set;
462} 163}
463 164
464QuadArrayPass::QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_, 165QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
465 VKDescriptorPool& descriptor_pool_, 166 VKDescriptorPool& descriptor_pool_,
466 VKStagingBufferPool& staging_buffer_pool_, 167 VKStagingBufferPool& staging_buffer_pool_,
467 VKUpdateDescriptorQueue& update_descriptor_queue_) 168 VKUpdateDescriptorQueue& update_descriptor_queue_)
468 : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(), 169 : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(),
469 BuildQuadArrayPassDescriptorUpdateTemplateEntry(), 170 BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
470 BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array), 171 BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV),
471 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, 172 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
472 update_descriptor_queue{update_descriptor_queue_} {} 173 update_descriptor_queue{update_descriptor_queue_} {}
473 174
@@ -510,12 +211,11 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32
510 return {*buffer.handle, 0}; 211 return {*buffer.handle, 0};
511} 212}
512 213
513Uint8Pass::Uint8Pass(const VKDevice& device_, VKScheduler& scheduler_, 214Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
514 VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_, 215 VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool_,
515 VKUpdateDescriptorQueue& update_descriptor_queue_) 216 VKUpdateDescriptorQueue& update_descriptor_queue_)
516 : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), 217 : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
517 BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass), 218 BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV),
518 uint8_pass),
519 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, 219 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
520 update_descriptor_queue{update_descriptor_queue_} {} 220 update_descriptor_queue{update_descriptor_queue_} {}
521 221
@@ -555,14 +255,13 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff
555 return {*buffer.handle, 0}; 255 return {*buffer.handle, 0};
556} 256}
557 257
558QuadIndexedPass::QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler_, 258QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
559 VKDescriptorPool& descriptor_pool_, 259 VKDescriptorPool& descriptor_pool_,
560 VKStagingBufferPool& staging_buffer_pool_, 260 VKStagingBufferPool& staging_buffer_pool_,
561 VKUpdateDescriptorQueue& update_descriptor_queue_) 261 VKUpdateDescriptorQueue& update_descriptor_queue_)
562 : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), 262 : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(),
563 BuildInputOutputDescriptorUpdateTemplate(), 263 BuildInputOutputDescriptorUpdateTemplate(),
564 BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV), 264 BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV),
565 QUAD_INDEXED_SPV),
566 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, 265 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
567 update_descriptor_queue{update_descriptor_queue_} {} 266 update_descriptor_queue{update_descriptor_queue_} {}
568 267
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 2dc87902c..7ddb09afb 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -5,27 +5,27 @@
5#pragma once 5#pragma once
6 6
7#include <optional> 7#include <optional>
8#include <span>
8#include <utility> 9#include <utility>
9 10
10#include "common/common_types.h" 11#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
12#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 13#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
13#include "video_core/renderer_vulkan/wrapper.h" 14#include "video_core/vulkan_common/vulkan_wrapper.h"
14 15
15namespace Vulkan { 16namespace Vulkan {
16 17
17class VKDevice; 18class Device;
18class VKScheduler; 19class VKScheduler;
19class VKStagingBufferPool; 20class VKStagingBufferPool;
20class VKUpdateDescriptorQueue; 21class VKUpdateDescriptorQueue;
21 22
22class VKComputePass { 23class VKComputePass {
23public: 24public:
24 explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, 25 explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
25 vk::Span<VkDescriptorSetLayoutBinding> bindings, 26 vk::Span<VkDescriptorSetLayoutBinding> bindings,
26 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, 27 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
27 vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, 28 vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
28 const u8* code);
29 ~VKComputePass(); 29 ~VKComputePass();
30 30
31protected: 31protected:
@@ -43,7 +43,7 @@ private:
43 43
44class QuadArrayPass final : public VKComputePass { 44class QuadArrayPass final : public VKComputePass {
45public: 45public:
46 explicit QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_, 46 explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_,
47 VKDescriptorPool& descriptor_pool_, 47 VKDescriptorPool& descriptor_pool_,
48 VKStagingBufferPool& staging_buffer_pool_, 48 VKStagingBufferPool& staging_buffer_pool_,
49 VKUpdateDescriptorQueue& update_descriptor_queue_); 49 VKUpdateDescriptorQueue& update_descriptor_queue_);
@@ -59,7 +59,7 @@ private:
59 59
60class Uint8Pass final : public VKComputePass { 60class Uint8Pass final : public VKComputePass {
61public: 61public:
62 explicit Uint8Pass(const VKDevice& device_, VKScheduler& scheduler_, 62 explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
63 VKDescriptorPool& descriptor_pool_, 63 VKDescriptorPool& descriptor_pool_,
64 VKStagingBufferPool& staging_buffer_pool_, 64 VKStagingBufferPool& staging_buffer_pool_,
65 VKUpdateDescriptorQueue& update_descriptor_queue_); 65 VKUpdateDescriptorQueue& update_descriptor_queue_);
@@ -75,7 +75,7 @@ private:
75 75
76class QuadIndexedPass final : public VKComputePass { 76class QuadIndexedPass final : public VKComputePass {
77public: 77public:
78 explicit QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler_, 78 explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
79 VKDescriptorPool& descriptor_pool_, 79 VKDescriptorPool& descriptor_pool_,
80 VKStagingBufferPool& staging_buffer_pool_, 80 VKStagingBufferPool& staging_buffer_pool_,
81 VKUpdateDescriptorQueue& update_descriptor_queue_); 81 VKUpdateDescriptorQueue& update_descriptor_queue_);
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 62f44d6da..3a48219b7 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -6,16 +6,16 @@
6 6
7#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 7#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
8#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 8#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
9#include "video_core/renderer_vulkan/vk_device.h"
10#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 9#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
11#include "video_core/renderer_vulkan/vk_scheduler.h" 10#include "video_core/renderer_vulkan/vk_scheduler.h"
12#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 11#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
13#include "video_core/renderer_vulkan/vk_update_descriptor.h" 12#include "video_core/renderer_vulkan/vk_update_descriptor.h"
14#include "video_core/renderer_vulkan/wrapper.h" 13#include "video_core/vulkan_common/vulkan_device.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h"
15 15
16namespace Vulkan { 16namespace Vulkan {
17 17
18VKComputePipeline::VKComputePipeline(const VKDevice& device_, VKScheduler& scheduler_, 18VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_,
19 VKDescriptorPool& descriptor_pool_, 19 VKDescriptorPool& descriptor_pool_,
20 VKUpdateDescriptorQueue& update_descriptor_queue_, 20 VKUpdateDescriptorQueue& update_descriptor_queue_,
21 const SPIRVShader& shader_) 21 const SPIRVShader& shader_)
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 49e2113a2..7e16575ac 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -7,17 +7,17 @@
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 8#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
9#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 9#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
10#include "video_core/renderer_vulkan/wrapper.h" 10#include "video_core/vulkan_common/vulkan_wrapper.h"
11 11
12namespace Vulkan { 12namespace Vulkan {
13 13
14class VKDevice; 14class Device;
15class VKScheduler; 15class VKScheduler;
16class VKUpdateDescriptorQueue; 16class VKUpdateDescriptorQueue;
17 17
18class VKComputePipeline final { 18class VKComputePipeline final {
19public: 19public:
20 explicit VKComputePipeline(const VKDevice& device_, VKScheduler& scheduler_, 20 explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_,
21 VKDescriptorPool& descriptor_pool_, 21 VKDescriptorPool& descriptor_pool_,
22 VKUpdateDescriptorQueue& update_descriptor_queue_, 22 VKUpdateDescriptorQueue& update_descriptor_queue_,
23 const SPIRVShader& shader_); 23 const SPIRVShader& shader_);
@@ -48,7 +48,7 @@ private:
48 48
49 vk::Pipeline CreatePipeline() const; 49 vk::Pipeline CreatePipeline() const;
50 50
51 const VKDevice& device; 51 const Device& device;
52 VKScheduler& scheduler; 52 VKScheduler& scheduler;
53 ShaderEntries entries; 53 ShaderEntries entries;
54 54
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index f38e089d5..ef9fb5910 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -6,10 +6,10 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 8#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
9#include "video_core/renderer_vulkan/vk_device.h"
10#include "video_core/renderer_vulkan/vk_resource_pool.h" 9#include "video_core/renderer_vulkan/vk_resource_pool.h"
11#include "video_core/renderer_vulkan/vk_scheduler.h" 10#include "video_core/renderer_vulkan/vk_scheduler.h"
12#include "video_core/renderer_vulkan/wrapper.h" 11#include "video_core/vulkan_common/vulkan_device.h"
12#include "video_core/vulkan_common/vulkan_wrapper.h"
13 13
14namespace Vulkan { 14namespace Vulkan {
15 15
@@ -32,7 +32,7 @@ void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
32 descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); 32 descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin));
33} 33}
34 34
35VKDescriptorPool::VKDescriptorPool(const VKDevice& device_, VKScheduler& scheduler) 35VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler)
36 : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{ 36 : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{
37 AllocateNewPool()} {} 37 AllocateNewPool()} {}
38 38
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
index 544f32a20..f892be7be 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -7,11 +7,11 @@
7#include <vector> 7#include <vector>
8 8
9#include "video_core/renderer_vulkan/vk_resource_pool.h" 9#include "video_core/renderer_vulkan/vk_resource_pool.h"
10#include "video_core/renderer_vulkan/wrapper.h" 10#include "video_core/vulkan_common/vulkan_wrapper.h"
11 11
12namespace Vulkan { 12namespace Vulkan {
13 13
14class VKDevice; 14class Device;
15class VKDescriptorPool; 15class VKDescriptorPool;
16class VKScheduler; 16class VKScheduler;
17 17
@@ -39,7 +39,7 @@ class VKDescriptorPool final {
39 friend DescriptorAllocator; 39 friend DescriptorAllocator;
40 40
41public: 41public:
42 explicit VKDescriptorPool(const VKDevice& device, VKScheduler& scheduler); 42 explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler);
43 ~VKDescriptorPool(); 43 ~VKDescriptorPool();
44 44
45 VKDescriptorPool(const VKDescriptorPool&) = delete; 45 VKDescriptorPool(const VKDescriptorPool&) = delete;
@@ -50,7 +50,7 @@ private:
50 50
51 vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); 51 vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count);
52 52
53 const VKDevice& device; 53 const Device& device;
54 MasterSemaphore& master_semaphore; 54 MasterSemaphore& master_semaphore;
55 55
56 std::vector<vk::DescriptorPool> pools; 56 std::vector<vk::DescriptorPool> pools;
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index 0bcaee714..4c5bc0aa1 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -6,19 +6,19 @@
6#include <thread> 6#include <thread>
7 7
8#include "video_core/renderer_vulkan/vk_buffer_cache.h" 8#include "video_core/renderer_vulkan/vk_buffer_cache.h"
9#include "video_core/renderer_vulkan/vk_device.h"
10#include "video_core/renderer_vulkan/vk_fence_manager.h" 9#include "video_core/renderer_vulkan/vk_fence_manager.h"
11#include "video_core/renderer_vulkan/vk_scheduler.h" 10#include "video_core/renderer_vulkan/vk_scheduler.h"
12#include "video_core/renderer_vulkan/vk_texture_cache.h" 11#include "video_core/renderer_vulkan/vk_texture_cache.h"
13#include "video_core/renderer_vulkan/wrapper.h" 12#include "video_core/vulkan_common/vulkan_device.h"
13#include "video_core/vulkan_common/vulkan_wrapper.h"
14 14
15namespace Vulkan { 15namespace Vulkan {
16 16
17InnerFence::InnerFence(const VKDevice& device_, VKScheduler& scheduler_, u32 payload_, 17InnerFence::InnerFence(const Device& device_, VKScheduler& scheduler_, u32 payload_,
18 bool is_stubbed_) 18 bool is_stubbed_)
19 : FenceBase{payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {} 19 : FenceBase{payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {}
20 20
21InnerFence::InnerFence(const VKDevice& device_, VKScheduler& scheduler_, GPUVAddr address_, 21InnerFence::InnerFence(const Device& device_, VKScheduler& scheduler_, GPUVAddr address_,
22 u32 payload_, bool is_stubbed_) 22 u32 payload_, bool is_stubbed_)
23 : FenceBase{address_, payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {} 23 : FenceBase{address_, payload_, is_stubbed_}, device{device_}, scheduler{scheduler_} {}
24 24
@@ -73,10 +73,9 @@ bool InnerFence::IsEventSignalled() const {
73} 73}
74 74
75VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 75VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
76 Tegra::MemoryManager& memory_manager_, 76 Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
77 VKTextureCache& texture_cache_, VKBufferCache& buffer_cache_, 77 VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
78 VKQueryCache& query_cache_, const VKDevice& device_, 78 const Device& device_, VKScheduler& scheduler_)
79 VKScheduler& scheduler_)
80 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, 79 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
81 device{device_}, scheduler{scheduler_} {} 80 device{device_}, scheduler{scheduler_} {}
82 81
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index c8547cc24..6b51e4587 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -8,7 +8,8 @@
8 8
9#include "video_core/fence_manager.h" 9#include "video_core/fence_manager.h"
10#include "video_core/renderer_vulkan/vk_buffer_cache.h" 10#include "video_core/renderer_vulkan/vk_buffer_cache.h"
11#include "video_core/renderer_vulkan/wrapper.h" 11#include "video_core/renderer_vulkan/vk_texture_cache.h"
12#include "video_core/vulkan_common/vulkan_wrapper.h"
12 13
13namespace Core { 14namespace Core {
14class System; 15class System;
@@ -20,17 +21,16 @@ class RasterizerInterface;
20 21
21namespace Vulkan { 22namespace Vulkan {
22 23
24class Device;
23class VKBufferCache; 25class VKBufferCache;
24class VKDevice;
25class VKQueryCache; 26class VKQueryCache;
26class VKScheduler; 27class VKScheduler;
27class VKTextureCache;
28 28
29class InnerFence : public VideoCommon::FenceBase { 29class InnerFence : public VideoCommon::FenceBase {
30public: 30public:
31 explicit InnerFence(const VKDevice& device_, VKScheduler& scheduler_, u32 payload_, 31 explicit InnerFence(const Device& device_, VKScheduler& scheduler_, u32 payload_,
32 bool is_stubbed_); 32 bool is_stubbed_);
33 explicit InnerFence(const VKDevice& device_, VKScheduler& scheduler_, GPUVAddr address_, 33 explicit InnerFence(const Device& device_, VKScheduler& scheduler_, GPUVAddr address_,
34 u32 payload_, bool is_stubbed_); 34 u32 payload_, bool is_stubbed_);
35 ~InnerFence(); 35 ~InnerFence();
36 36
@@ -43,7 +43,7 @@ public:
43private: 43private:
44 bool IsEventSignalled() const; 44 bool IsEventSignalled() const;
45 45
46 const VKDevice& device; 46 const Device& device;
47 VKScheduler& scheduler; 47 VKScheduler& scheduler;
48 vk::Event event; 48 vk::Event event;
49 u64 ticks = 0; 49 u64 ticks = 0;
@@ -51,14 +51,14 @@ private:
51using Fence = std::shared_ptr<InnerFence>; 51using Fence = std::shared_ptr<InnerFence>;
52 52
53using GenericFenceManager = 53using GenericFenceManager =
54 VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>; 54 VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>;
55 55
56class VKFenceManager final : public GenericFenceManager { 56class VKFenceManager final : public GenericFenceManager {
57public: 57public:
58 explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 58 explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
59 Tegra::MemoryManager& memory_manager_, VKTextureCache& texture_cache_, 59 Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
60 VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, 60 VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
61 const VKDevice& device_, VKScheduler& scheduler_); 61 const Device& device_, VKScheduler& scheduler_);
62 62
63protected: 63protected:
64 Fence CreateFence(u32 value, bool is_stubbed) override; 64 Fence CreateFence(u32 value, bool is_stubbed) override;
@@ -68,7 +68,7 @@ protected:
68 void WaitFence(Fence& fence) override; 68 void WaitFence(Fence& fence) override;
69 69
70private: 70private:
71 const VKDevice& device; 71 const Device& device;
72 VKScheduler& scheduler; 72 VKScheduler& scheduler;
73}; 73};
74 74
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 970979fa1..a5214d0bc 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -12,13 +12,12 @@
12#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 12#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
13#include "video_core/renderer_vulkan/maxwell_to_vk.h" 13#include "video_core/renderer_vulkan/maxwell_to_vk.h"
14#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 14#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
15#include "video_core/renderer_vulkan/vk_device.h"
16#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 15#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
17#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 16#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
18#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
19#include "video_core/renderer_vulkan/vk_scheduler.h" 17#include "video_core/renderer_vulkan/vk_scheduler.h"
20#include "video_core/renderer_vulkan/vk_update_descriptor.h" 18#include "video_core/renderer_vulkan/vk_update_descriptor.h"
21#include "video_core/renderer_vulkan/wrapper.h" 19#include "video_core/vulkan_common/vulkan_device.h"
20#include "video_core/vulkan_common/vulkan_wrapper.h"
22 21
23namespace Vulkan { 22namespace Vulkan {
24 23
@@ -69,23 +68,45 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
69 }; 68 };
70} 69}
71 70
71VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
72 switch (msaa_mode) {
73 case Tegra::Texture::MsaaMode::Msaa1x1:
74 return VK_SAMPLE_COUNT_1_BIT;
75 case Tegra::Texture::MsaaMode::Msaa2x1:
76 case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
77 return VK_SAMPLE_COUNT_2_BIT;
78 case Tegra::Texture::MsaaMode::Msaa2x2:
79 case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
80 case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
81 return VK_SAMPLE_COUNT_4_BIT;
82 case Tegra::Texture::MsaaMode::Msaa4x2:
83 case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
84 case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
85 case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
86 return VK_SAMPLE_COUNT_8_BIT;
87 case Tegra::Texture::MsaaMode::Msaa4x4:
88 return VK_SAMPLE_COUNT_16_BIT;
89 default:
90 UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
91 return VK_SAMPLE_COUNT_1_BIT;
92 }
93}
94
72} // Anonymous namespace 95} // Anonymous namespace
73 96
74VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, 97VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_,
75 VKDescriptorPool& descriptor_pool_, 98 VKDescriptorPool& descriptor_pool_,
76 VKUpdateDescriptorQueue& update_descriptor_queue_, 99 VKUpdateDescriptorQueue& update_descriptor_queue_,
77 VKRenderPassCache& renderpass_cache_, 100 const GraphicsPipelineCacheKey& key,
78 const GraphicsPipelineCacheKey& key_, 101 vk::Span<VkDescriptorSetLayoutBinding> bindings,
79 vk::Span<VkDescriptorSetLayoutBinding> bindings_, 102 const SPIRVProgram& program, u32 num_color_buffers)
80 const SPIRVProgram& program_) 103 : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()},
81 : device{device_}, scheduler{scheduler_}, cache_key{key_}, hash{cache_key.Hash()}, 104 descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
82 descriptor_set_layout{CreateDescriptorSetLayout(bindings_)},
83 descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, 105 descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
84 update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, 106 update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
85 descriptor_template{CreateDescriptorUpdateTemplate(program_)}, modules{CreateShaderModules( 107 descriptor_template{CreateDescriptorUpdateTemplate(program)},
86 program_)}, 108 modules(CreateShaderModules(program)),
87 renderpass{renderpass_cache_.GetRenderPass(cache_key.renderpass_params)}, 109 pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {}
88 pipeline{CreatePipeline(cache_key.renderpass_params, program_)} {}
89 110
90VKGraphicsPipeline::~VKGraphicsPipeline() = default; 111VKGraphicsPipeline::~VKGraphicsPipeline() = default;
91 112
@@ -179,8 +200,9 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
179 return shader_modules; 200 return shader_modules;
180} 201}
181 202
182vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, 203vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
183 const SPIRVProgram& program) const { 204 VkRenderPass renderpass,
205 u32 num_color_buffers) const {
184 const auto& state = cache_key.fixed_state; 206 const auto& state = cache_key.fixed_state;
185 const auto& viewport_swizzles = state.viewport_swizzles; 207 const auto& viewport_swizzles = state.viewport_swizzles;
186 208
@@ -190,11 +212,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
190 // state is ignored 212 // state is ignored
191 dynamic.raw1 = 0; 213 dynamic.raw1 = 0;
192 dynamic.raw2 = 0; 214 dynamic.raw2 = 0;
193 for (FixedPipelineState::VertexBinding& binding : dynamic.vertex_bindings) { 215 dynamic.vertex_strides.fill(0);
194 // Enable all vertex bindings
195 binding.raw = 0;
196 binding.enabled.Assign(1);
197 }
198 } else { 216 } else {
199 dynamic = state.dynamic_state; 217 dynamic = state.dynamic_state;
200 } 218 }
@@ -202,19 +220,16 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
202 std::vector<VkVertexInputBindingDescription> vertex_bindings; 220 std::vector<VkVertexInputBindingDescription> vertex_bindings;
203 std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; 221 std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
204 for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { 222 for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
205 const auto& binding = dynamic.vertex_bindings[index]; 223 if (state.attributes[index].binding_index_enabled == 0) {
206 if (!binding.enabled) {
207 continue; 224 continue;
208 } 225 }
209 const bool instanced = state.binding_divisors[index] != 0; 226 const bool instanced = state.binding_divisors[index] != 0;
210 const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; 227 const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
211
212 vertex_bindings.push_back({ 228 vertex_bindings.push_back({
213 .binding = static_cast<u32>(index), 229 .binding = static_cast<u32>(index),
214 .stride = binding.stride, 230 .stride = dynamic.vertex_strides[index],
215 .inputRate = rate, 231 .inputRate = rate,
216 }); 232 });
217
218 if (instanced) { 233 if (instanced) {
219 vertex_binding_divisors.push_back({ 234 vertex_binding_divisors.push_back({
220 .binding = static_cast<u32>(index), 235 .binding = static_cast<u32>(index),
@@ -290,8 +305,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
290 }; 305 };
291 306
292 std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; 307 std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
293 std::transform(viewport_swizzles.begin(), viewport_swizzles.end(), swizzles.begin(), 308 std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
294 UnpackViewportSwizzle);
295 VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ 309 VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
296 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, 310 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
297 .pNext = nullptr, 311 .pNext = nullptr,
@@ -326,7 +340,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
326 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, 340 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
327 .pNext = nullptr, 341 .pNext = nullptr,
328 .flags = 0, 342 .flags = 0,
329 .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, 343 .rasterizationSamples = ConvertMsaaMode(state.msaa_mode),
330 .sampleShadingEnable = VK_FALSE, 344 .sampleShadingEnable = VK_FALSE,
331 .minSampleShading = 0.0f, 345 .minSampleShading = 0.0f,
332 .pSampleMask = nullptr, 346 .pSampleMask = nullptr,
@@ -352,8 +366,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
352 }; 366 };
353 367
354 std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; 368 std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
355 const auto num_attachments = static_cast<std::size_t>(renderpass_params.num_color_attachments); 369 for (std::size_t index = 0; index < num_color_buffers; ++index) {
356 for (std::size_t index = 0; index < num_attachments; ++index) {
357 static constexpr std::array COMPONENT_TABLE{ 370 static constexpr std::array COMPONENT_TABLE{
358 VK_COLOR_COMPONENT_R_BIT, 371 VK_COLOR_COMPONENT_R_BIT,
359 VK_COLOR_COMPONENT_G_BIT, 372 VK_COLOR_COMPONENT_G_BIT,
@@ -387,7 +400,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
387 .flags = 0, 400 .flags = 0,
388 .logicOpEnable = VK_FALSE, 401 .logicOpEnable = VK_FALSE,
389 .logicOp = VK_LOGIC_OP_COPY, 402 .logicOp = VK_LOGIC_OP_COPY,
390 .attachmentCount = static_cast<u32>(num_attachments), 403 .attachmentCount = num_color_buffers,
391 .pAttachments = cb_attachments.data(), 404 .pAttachments = cb_attachments.data(),
392 .blendConstants = {}, 405 .blendConstants = {},
393 }; 406 };
@@ -447,8 +460,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
447 stage_ci.pNext = &subgroup_size_ci; 460 stage_ci.pNext = &subgroup_size_ci;
448 } 461 }
449 } 462 }
450 463 return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{
451 const VkGraphicsPipelineCreateInfo ci{
452 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, 464 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
453 .pNext = nullptr, 465 .pNext = nullptr,
454 .flags = 0, 466 .flags = 0,
@@ -468,8 +480,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
468 .subpass = 0, 480 .subpass = 0,
469 .basePipelineHandle = nullptr, 481 .basePipelineHandle = nullptr,
470 .basePipelineIndex = 0, 482 .basePipelineIndex = 0,
471 }; 483 });
472 return device.GetLogical().CreateGraphicsPipeline(ci);
473} 484}
474 485
475} // namespace Vulkan 486} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 3fb31d55a..8b6a98fe0 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -8,20 +8,19 @@
8#include <optional> 8#include <optional>
9#include <vector> 9#include <vector>
10 10
11#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
12#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 13#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
13#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 14#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
14#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
15#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 15#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
16#include "video_core/renderer_vulkan/wrapper.h" 16#include "video_core/vulkan_common/vulkan_wrapper.h"
17 17
18namespace Vulkan { 18namespace Vulkan {
19 19
20using Maxwell = Tegra::Engines::Maxwell3D::Regs; 20using Maxwell = Tegra::Engines::Maxwell3D::Regs;
21 21
22struct GraphicsPipelineCacheKey { 22struct GraphicsPipelineCacheKey {
23 RenderPassParams renderpass_params; 23 VkRenderPass renderpass;
24 u32 padding;
25 std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; 24 std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
26 FixedPipelineState fixed_state; 25 FixedPipelineState fixed_state;
27 26
@@ -34,16 +33,15 @@ struct GraphicsPipelineCacheKey {
34 } 33 }
35 34
36 std::size_t Size() const noexcept { 35 std::size_t Size() const noexcept {
37 return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); 36 return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size();
38 } 37 }
39}; 38};
40static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); 39static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
41static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); 40static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
42static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); 41static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
43 42
43class Device;
44class VKDescriptorPool; 44class VKDescriptorPool;
45class VKDevice;
46class VKRenderPassCache;
47class VKScheduler; 45class VKScheduler;
48class VKUpdateDescriptorQueue; 46class VKUpdateDescriptorQueue;
49 47
@@ -51,13 +49,12 @@ using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderSt
51 49
52class VKGraphicsPipeline final { 50class VKGraphicsPipeline final {
53public: 51public:
54 explicit VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, 52 explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_,
55 VKDescriptorPool& descriptor_pool_, 53 VKDescriptorPool& descriptor_pool,
56 VKUpdateDescriptorQueue& update_descriptor_queue_, 54 VKUpdateDescriptorQueue& update_descriptor_queue_,
57 VKRenderPassCache& renderpass_cache_, 55 const GraphicsPipelineCacheKey& key,
58 const GraphicsPipelineCacheKey& key_, 56 vk::Span<VkDescriptorSetLayoutBinding> bindings,
59 vk::Span<VkDescriptorSetLayoutBinding> bindings_, 57 const SPIRVProgram& program, u32 num_color_buffers);
60 const SPIRVProgram& program_);
61 ~VKGraphicsPipeline(); 58 ~VKGraphicsPipeline();
62 59
63 VkDescriptorSet CommitDescriptorSet(); 60 VkDescriptorSet CommitDescriptorSet();
@@ -70,10 +67,6 @@ public:
70 return *layout; 67 return *layout;
71 } 68 }
72 69
73 VkRenderPass GetRenderPass() const {
74 return renderpass;
75 }
76
77 GraphicsPipelineCacheKey GetCacheKey() const { 70 GraphicsPipelineCacheKey GetCacheKey() const {
78 return cache_key; 71 return cache_key;
79 } 72 }
@@ -89,10 +82,10 @@ private:
89 82
90 std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; 83 std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
91 84
92 vk::Pipeline CreatePipeline(const RenderPassParams& renderpass_params, 85 vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass,
93 const SPIRVProgram& program) const; 86 u32 num_color_buffers) const;
94 87
95 const VKDevice& device; 88 const Device& device;
96 VKScheduler& scheduler; 89 VKScheduler& scheduler;
97 const GraphicsPipelineCacheKey cache_key; 90 const GraphicsPipelineCacheKey cache_key;
98 const u64 hash; 91 const u64 hash;
@@ -104,7 +97,6 @@ private:
104 vk::DescriptorUpdateTemplateKHR descriptor_template; 97 vk::DescriptorUpdateTemplateKHR descriptor_template;
105 std::vector<vk::ShaderModule> modules; 98 std::vector<vk::ShaderModule> modules;
106 99
107 VkRenderPass renderpass;
108 vk::Pipeline pipeline; 100 vk::Pipeline pipeline;
109}; 101};
110 102
diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp
deleted file mode 100644
index 072d14e3b..000000000
--- a/src/video_core/renderer_vulkan/vk_image.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <memory>
6#include <vector>
7
8#include "common/assert.h"
9#include "video_core/renderer_vulkan/vk_device.h"
10#include "video_core/renderer_vulkan/vk_image.h"
11#include "video_core/renderer_vulkan/vk_scheduler.h"
12#include "video_core/renderer_vulkan/wrapper.h"
13
14namespace Vulkan {
15
16VKImage::VKImage(const VKDevice& device_, VKScheduler& scheduler_,
17 const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_)
18 : device{device_}, scheduler{scheduler_}, format{image_ci_.format}, aspect_mask{aspect_mask_},
19 image_num_layers{image_ci_.arrayLayers}, image_num_levels{image_ci_.mipLevels} {
20 UNIMPLEMENTED_IF_MSG(image_ci_.queueFamilyIndexCount != 0,
21 "Queue family tracking is not implemented");
22
23 image = device_.GetLogical().CreateImage(image_ci_);
24
25 const u32 num_ranges = image_num_layers * image_num_levels;
26 barriers.resize(num_ranges);
27 subrange_states.resize(num_ranges, {{}, image_ci_.initialLayout});
28}
29
30VKImage::~VKImage() = default;
31
32void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
33 VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
34 VkImageLayout new_layout) {
35 if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) {
36 return;
37 }
38
39 std::size_t cursor = 0;
40 for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
41 for (u32 level_it = 0; level_it < num_levels; ++level_it, ++cursor) {
42 const u32 layer = base_layer + layer_it;
43 const u32 level = base_level + level_it;
44 auto& state = GetSubrangeState(layer, level);
45 auto& barrier = barriers[cursor];
46 barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
47 barrier.pNext = nullptr;
48 barrier.srcAccessMask = state.access;
49 barrier.dstAccessMask = new_access;
50 barrier.oldLayout = state.layout;
51 barrier.newLayout = new_layout;
52 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
53 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
54 barrier.image = *image;
55 barrier.subresourceRange.aspectMask = aspect_mask;
56 barrier.subresourceRange.baseMipLevel = level;
57 barrier.subresourceRange.levelCount = 1;
58 barrier.subresourceRange.baseArrayLayer = layer;
59 barrier.subresourceRange.layerCount = 1;
60 state.access = new_access;
61 state.layout = new_layout;
62 }
63 }
64
65 scheduler.RequestOutsideRenderPassOperationContext();
66
67 scheduler.Record([barriers = barriers, cursor](vk::CommandBuffer cmdbuf) {
68 // TODO(Rodrigo): Implement a way to use the latest stage across subresources.
69 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
70 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, {}, {},
71 vk::Span(barriers.data(), cursor));
72 });
73}
74
75bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
76 VkAccessFlags new_access, VkImageLayout new_layout) noexcept {
77 const bool is_full_range = base_layer == 0 && num_layers == image_num_layers &&
78 base_level == 0 && num_levels == image_num_levels;
79 if (!is_full_range) {
80 state_diverged = true;
81 }
82
83 if (!state_diverged) {
84 auto& state = GetSubrangeState(0, 0);
85 if (state.access != new_access || state.layout != new_layout) {
86 return true;
87 }
88 }
89
90 for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
91 for (u32 level_it = 0; level_it < num_levels; ++level_it) {
92 const u32 layer = base_layer + layer_it;
93 const u32 level = base_level + level_it;
94 auto& state = GetSubrangeState(layer, level);
95 if (state.access != new_access || state.layout != new_layout) {
96 return true;
97 }
98 }
99 }
100 return false;
101}
102
103void VKImage::CreatePresentView() {
104 // Image type has to be 2D to be presented.
105 present_view = device.GetLogical().CreateImageView({
106 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
107 .pNext = nullptr,
108 .flags = 0,
109 .image = *image,
110 .viewType = VK_IMAGE_VIEW_TYPE_2D,
111 .format = format,
112 .components =
113 {
114 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
115 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
116 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
117 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
118 },
119 .subresourceRange =
120 {
121 .aspectMask = aspect_mask,
122 .baseMipLevel = 0,
123 .levelCount = 1,
124 .baseArrayLayer = 0,
125 .layerCount = 1,
126 },
127 });
128}
129
130VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept {
131 return subrange_states[static_cast<std::size_t>(layer * image_num_levels) +
132 static_cast<std::size_t>(level)];
133}
134
135} // namespace Vulkan \ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_image.h b/src/video_core/renderer_vulkan/vk_image.h
deleted file mode 100644
index 287ab90ca..000000000
--- a/src/video_core/renderer_vulkan/vk_image.h
+++ /dev/null
@@ -1,84 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <vector>
9
10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/wrapper.h"
12
13namespace Vulkan {
14
15class VKDevice;
16class VKScheduler;
17
18class VKImage {
19public:
20 explicit VKImage(const VKDevice& device_, VKScheduler& scheduler_,
21 const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_);
22 ~VKImage();
23
24 /// Records in the passed command buffer an image transition and updates the state of the image.
25 void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
26 VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
27 VkImageLayout new_layout);
28
29 /// Returns a view compatible with presentation, the image has to be 2D.
30 VkImageView GetPresentView() {
31 if (!present_view) {
32 CreatePresentView();
33 }
34 return *present_view;
35 }
36
37 /// Returns the Vulkan image handler.
38 const vk::Image& GetHandle() const {
39 return image;
40 }
41
42 /// Returns the Vulkan format for this image.
43 VkFormat GetFormat() const {
44 return format;
45 }
46
47 /// Returns the Vulkan aspect mask.
48 VkImageAspectFlags GetAspectMask() const {
49 return aspect_mask;
50 }
51
52private:
53 struct SubrangeState final {
54 VkAccessFlags access = 0; ///< Current access bits.
55 VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; ///< Current image layout.
56 };
57
58 bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
59 VkAccessFlags new_access, VkImageLayout new_layout) noexcept;
60
61 /// Creates a presentation view.
62 void CreatePresentView();
63
64 /// Returns the subrange state for a layer and layer.
65 SubrangeState& GetSubrangeState(u32 layer, u32 level) noexcept;
66
67 const VKDevice& device; ///< Device handler.
68 VKScheduler& scheduler; ///< Device scheduler.
69
70 const VkFormat format; ///< Vulkan format.
71 const VkImageAspectFlags aspect_mask; ///< Vulkan aspect mask.
72 const u32 image_num_layers; ///< Number of layers.
73 const u32 image_num_levels; ///< Number of mipmap levels.
74
75 vk::Image image; ///< Image handle.
76 vk::ImageView present_view; ///< Image view compatible with presentation.
77
78 std::vector<VkImageMemoryBarrier> barriers; ///< Pool of barriers.
79 std::vector<SubrangeState> subrange_states; ///< Current subrange state.
80
81 bool state_diverged = false; ///< True when subresources mismatch in layout.
82};
83
84} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
index ae26e558d..56ec5e380 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -6,15 +6,15 @@
6#include <chrono> 6#include <chrono>
7 7
8#include "core/settings.h" 8#include "core/settings.h"
9#include "video_core/renderer_vulkan/vk_device.h"
10#include "video_core/renderer_vulkan/vk_master_semaphore.h" 9#include "video_core/renderer_vulkan/vk_master_semaphore.h"
11#include "video_core/renderer_vulkan/wrapper.h" 10#include "video_core/vulkan_common/vulkan_device.h"
11#include "video_core/vulkan_common/vulkan_wrapper.h"
12 12
13namespace Vulkan { 13namespace Vulkan {
14 14
15using namespace std::chrono_literals; 15using namespace std::chrono_literals;
16 16
17MasterSemaphore::MasterSemaphore(const VKDevice& device) { 17MasterSemaphore::MasterSemaphore(const Device& device) {
18 static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{ 18 static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{
19 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR, 19 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR,
20 .pNext = nullptr, 20 .pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index 0e93706d7..f336f1862 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -8,15 +8,15 @@
8#include <thread> 8#include <thread>
9 9
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/wrapper.h" 11#include "video_core/vulkan_common/vulkan_wrapper.h"
12 12
13namespace Vulkan { 13namespace Vulkan {
14 14
15class VKDevice; 15class Device;
16 16
17class MasterSemaphore { 17class MasterSemaphore {
18public: 18public:
19 explicit MasterSemaphore(const VKDevice& device); 19 explicit MasterSemaphore(const Device& device);
20 ~MasterSemaphore(); 20 ~MasterSemaphore();
21 21
22 /// Returns the current logical tick. 22 /// Returns the current logical tick.
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
index be53d450f..a6abd0eee 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -11,9 +11,9 @@
11#include "common/assert.h" 11#include "common/assert.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/logging/log.h" 13#include "common/logging/log.h"
14#include "video_core/renderer_vulkan/vk_device.h"
15#include "video_core/renderer_vulkan/vk_memory_manager.h" 14#include "video_core/renderer_vulkan/vk_memory_manager.h"
16#include "video_core/renderer_vulkan/wrapper.h" 15#include "video_core/vulkan_common/vulkan_device.h"
16#include "video_core/vulkan_common/vulkan_wrapper.h"
17 17
18namespace Vulkan { 18namespace Vulkan {
19 19
@@ -29,7 +29,7 @@ u64 GetAllocationChunkSize(u64 required_size) {
29 29
30class VKMemoryAllocation final { 30class VKMemoryAllocation final {
31public: 31public:
32 explicit VKMemoryAllocation(const VKDevice& device_, vk::DeviceMemory memory_, 32 explicit VKMemoryAllocation(const Device& device_, vk::DeviceMemory memory_,
33 VkMemoryPropertyFlags properties_, u64 allocation_size_, u32 type_) 33 VkMemoryPropertyFlags properties_, u64 allocation_size_, u32 type_)
34 : device{device_}, memory{std::move(memory_)}, properties{properties_}, 34 : device{device_}, memory{std::move(memory_)}, properties{properties_},
35 allocation_size{allocation_size_}, shifted_type{ShiftType(type_)} {} 35 allocation_size{allocation_size_}, shifted_type{ShiftType(type_)} {}
@@ -104,7 +104,7 @@ private:
104 return std::nullopt; 104 return std::nullopt;
105 } 105 }
106 106
107 const VKDevice& device; ///< Vulkan device. 107 const Device& device; ///< Vulkan device.
108 const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. 108 const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
109 const VkMemoryPropertyFlags properties; ///< Vulkan properties. 109 const VkMemoryPropertyFlags properties; ///< Vulkan properties.
110 const u64 allocation_size; ///< Size of this allocation. 110 const u64 allocation_size; ///< Size of this allocation.
@@ -117,7 +117,7 @@ private:
117 std::vector<const VKMemoryCommitImpl*> commits; 117 std::vector<const VKMemoryCommitImpl*> commits;
118}; 118};
119 119
120VKMemoryManager::VKMemoryManager(const VKDevice& device_) 120VKMemoryManager::VKMemoryManager(const Device& device_)
121 : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {} 121 : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {}
122 122
123VKMemoryManager::~VKMemoryManager() = default; 123VKMemoryManager::~VKMemoryManager() = default;
@@ -207,7 +207,7 @@ VKMemoryCommit VKMemoryManager::TryAllocCommit(const VkMemoryRequirements& requi
207 return {}; 207 return {};
208} 208}
209 209
210VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device_, VKMemoryAllocation* allocation_, 210VKMemoryCommitImpl::VKMemoryCommitImpl(const Device& device_, VKMemoryAllocation* allocation_,
211 const vk::DeviceMemory& memory_, u64 begin_, u64 end_) 211 const vk::DeviceMemory& memory_, u64 begin_, u64 end_)
212 : device{device_}, memory{memory_}, interval{begin_, end_}, allocation{allocation_} {} 212 : device{device_}, memory{memory_}, interval{begin_, end_}, allocation{allocation_} {}
213 213
@@ -216,7 +216,7 @@ VKMemoryCommitImpl::~VKMemoryCommitImpl() {
216} 216}
217 217
218MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { 218MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
219 return MemoryMap{this, memory.Map(interval.first + offset_, size)}; 219 return MemoryMap(this, std::span<u8>(memory.Map(interval.first + offset_, size), size));
220} 220}
221 221
222void VKMemoryCommitImpl::Unmap() const { 222void VKMemoryCommitImpl::Unmap() const {
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h
index 39f903ec8..2452bca4e 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -5,15 +5,16 @@
5#pragma once 5#pragma once
6 6
7#include <memory> 7#include <memory>
8#include <span>
8#include <utility> 9#include <utility>
9#include <vector> 10#include <vector>
10#include "common/common_types.h" 11#include "common/common_types.h"
11#include "video_core/renderer_vulkan/wrapper.h" 12#include "video_core/vulkan_common/vulkan_wrapper.h"
12 13
13namespace Vulkan { 14namespace Vulkan {
14 15
16class Device;
15class MemoryMap; 17class MemoryMap;
16class VKDevice;
17class VKMemoryAllocation; 18class VKMemoryAllocation;
18class VKMemoryCommitImpl; 19class VKMemoryCommitImpl;
19 20
@@ -21,7 +22,7 @@ using VKMemoryCommit = std::unique_ptr<VKMemoryCommitImpl>;
21 22
22class VKMemoryManager final { 23class VKMemoryManager final {
23public: 24public:
24 explicit VKMemoryManager(const VKDevice& device_); 25 explicit VKMemoryManager(const Device& device_);
25 VKMemoryManager(const VKMemoryManager&) = delete; 26 VKMemoryManager(const VKMemoryManager&) = delete;
26 ~VKMemoryManager(); 27 ~VKMemoryManager();
27 28
@@ -48,7 +49,7 @@ private:
48 VKMemoryCommit TryAllocCommit(const VkMemoryRequirements& requirements, 49 VKMemoryCommit TryAllocCommit(const VkMemoryRequirements& requirements,
49 VkMemoryPropertyFlags wanted_properties); 50 VkMemoryPropertyFlags wanted_properties);
50 51
51 const VKDevice& device; ///< Device handler. 52 const Device& device; ///< Device handler.
52 const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. 53 const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
53 std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations. 54 std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
54}; 55};
@@ -58,7 +59,7 @@ class VKMemoryCommitImpl final {
58 friend MemoryMap; 59 friend MemoryMap;
59 60
60public: 61public:
61 explicit VKMemoryCommitImpl(const VKDevice& device_, VKMemoryAllocation* allocation_, 62 explicit VKMemoryCommitImpl(const Device& device_, VKMemoryAllocation* allocation_,
62 const vk::DeviceMemory& memory_, u64 begin_, u64 end_); 63 const vk::DeviceMemory& memory_, u64 begin_, u64 end_);
63 ~VKMemoryCommitImpl(); 64 ~VKMemoryCommitImpl();
64 65
@@ -84,7 +85,7 @@ private:
84 /// Unmaps memory. 85 /// Unmaps memory.
85 void Unmap() const; 86 void Unmap() const;
86 87
87 const VKDevice& device; ///< Vulkan device. 88 const Device& device; ///< Vulkan device.
88 const vk::DeviceMemory& memory; ///< Vulkan device memory handler. 89 const vk::DeviceMemory& memory; ///< Vulkan device memory handler.
89 std::pair<u64, u64> interval{}; ///< Interval where the commit exists. 90 std::pair<u64, u64> interval{}; ///< Interval where the commit exists.
90 VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. 91 VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
@@ -93,8 +94,8 @@ private:
93/// Holds ownership of a memory map. 94/// Holds ownership of a memory map.
94class MemoryMap final { 95class MemoryMap final {
95public: 96public:
96 explicit MemoryMap(const VKMemoryCommitImpl* commit_, u8* address_) 97 explicit MemoryMap(const VKMemoryCommitImpl* commit_, std::span<u8> span_)
97 : commit{commit_}, address{address_} {} 98 : commit{commit_}, span{span_} {}
98 99
99 ~MemoryMap() { 100 ~MemoryMap() {
100 if (commit) { 101 if (commit) {
@@ -108,19 +109,24 @@ public:
108 commit = nullptr; 109 commit = nullptr;
109 } 110 }
110 111
112 /// Returns a span to the memory map.
113 [[nodiscard]] std::span<u8> Span() const noexcept {
114 return span;
115 }
116
111 /// Returns the address of the memory map. 117 /// Returns the address of the memory map.
112 u8* GetAddress() const { 118 [[nodiscard]] u8* Address() const noexcept {
113 return address; 119 return span.data();
114 } 120 }
115 121
116 /// Returns the address of the memory map; 122 /// Returns the address of the memory map;
117 operator u8*() const { 123 [[nodiscard]] operator u8*() const noexcept {
118 return address; 124 return span.data();
119 } 125 }
120 126
121private: 127private:
122 const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit. 128 const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
123 u8* address{}; ///< Address to the mapped memory. 129 std::span<u8> span; ///< Address to the mapped memory.
124}; 130};
125 131
126} // namespace Vulkan 132} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 3fb264d03..02282e36f 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -8,6 +8,7 @@
8#include <vector> 8#include <vector>
9 9
10#include "common/bit_cast.h" 10#include "common/bit_cast.h"
11#include "common/cityhash.h"
11#include "common/microprofile.h" 12#include "common/microprofile.h"
12#include "core/core.h" 13#include "core/core.h"
13#include "core/memory.h" 14#include "core/memory.h"
@@ -18,18 +19,17 @@
18#include "video_core/renderer_vulkan/maxwell_to_vk.h" 19#include "video_core/renderer_vulkan/maxwell_to_vk.h"
19#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 20#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
20#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 21#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
21#include "video_core/renderer_vulkan/vk_device.h"
22#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 22#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
23#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 23#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
24#include "video_core/renderer_vulkan/vk_rasterizer.h" 24#include "video_core/renderer_vulkan/vk_rasterizer.h"
25#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
26#include "video_core/renderer_vulkan/vk_scheduler.h" 25#include "video_core/renderer_vulkan/vk_scheduler.h"
27#include "video_core/renderer_vulkan/vk_update_descriptor.h" 26#include "video_core/renderer_vulkan/vk_update_descriptor.h"
28#include "video_core/renderer_vulkan/wrapper.h"
29#include "video_core/shader/compiler_settings.h" 27#include "video_core/shader/compiler_settings.h"
30#include "video_core/shader/memory_util.h" 28#include "video_core/shader/memory_util.h"
31#include "video_core/shader_cache.h" 29#include "video_core/shader_cache.h"
32#include "video_core/shader_notify.h" 30#include "video_core/shader_notify.h"
31#include "video_core/vulkan_common/vulkan_device.h"
32#include "video_core/vulkan_common/vulkan_wrapper.h"
33 33
34namespace Vulkan { 34namespace Vulkan {
35 35
@@ -52,7 +52,9 @@ constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEX
52constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; 52constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
53 53
54constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ 54constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
55 VideoCommon::Shader::CompileDepth::FullDecompile}; 55 .depth = VideoCommon::Shader::CompileDepth::FullDecompile,
56 .disable_else_derivation = true,
57};
56 58
57constexpr std::size_t GetStageFromProgram(std::size_t program) { 59constexpr std::size_t GetStageFromProgram(std::size_t program) {
58 return program == 0 ? 0 : program - 1; 60 return program == 0 ? 0 : program - 1;
@@ -147,14 +149,13 @@ Shader::~Shader() = default;
147VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, 149VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
148 Tegra::Engines::Maxwell3D& maxwell3d_, 150 Tegra::Engines::Maxwell3D& maxwell3d_,
149 Tegra::Engines::KeplerCompute& kepler_compute_, 151 Tegra::Engines::KeplerCompute& kepler_compute_,
150 Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, 152 Tegra::MemoryManager& gpu_memory_, const Device& device_,
151 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, 153 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
152 VKUpdateDescriptorQueue& update_descriptor_queue_, 154 VKUpdateDescriptorQueue& update_descriptor_queue_)
153 VKRenderPassCache& renderpass_cache_) 155 : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
154 : ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, 156 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
155 gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, 157 scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
156 descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, 158 update_descriptor_queue_} {}
157 renderpass_cache{renderpass_cache_} {}
158 159
159VKPipelineCache::~VKPipelineCache() = default; 160VKPipelineCache::~VKPipelineCache() = default;
160 161
@@ -199,7 +200,8 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
199} 200}
200 201
201VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( 202VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
202 const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) { 203 const GraphicsPipelineCacheKey& key, u32 num_color_buffers,
204 VideoCommon::Shader::AsyncShaders& async_shaders) {
203 MICROPROFILE_SCOPE(Vulkan_PipelineCache); 205 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
204 206
205 if (last_graphics_pipeline && last_graphics_key == key) { 207 if (last_graphics_pipeline && last_graphics_key == key) {
@@ -215,8 +217,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
215 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); 217 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
216 const auto [program, bindings] = DecompileShaders(key.fixed_state); 218 const auto [program, bindings] = DecompileShaders(key.fixed_state);
217 async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, 219 async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
218 update_descriptor_queue, renderpass_cache, bindings, 220 update_descriptor_queue, bindings, program, key,
219 program, key); 221 num_color_buffers);
220 } 222 }
221 last_graphics_pipeline = pair->second.get(); 223 last_graphics_pipeline = pair->second.get();
222 return last_graphics_pipeline; 224 return last_graphics_pipeline;
@@ -229,8 +231,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
229 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); 231 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
230 const auto [program, bindings] = DecompileShaders(key.fixed_state); 232 const auto [program, bindings] = DecompileShaders(key.fixed_state);
231 entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, 233 entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
232 update_descriptor_queue, renderpass_cache, key, 234 update_descriptor_queue, key, bindings,
233 bindings, program); 235 program, num_color_buffers);
234 gpu.ShaderNotify().MarkShaderComplete(); 236 gpu.ShaderNotify().MarkShaderComplete();
235 } 237 }
236 last_graphics_pipeline = entry.get(); 238 last_graphics_pipeline = entry.get();
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 9e1f8fcbb..89d635a3d 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -19,14 +19,13 @@
19#include "video_core/engines/maxwell_3d.h" 19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 20#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
21#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 21#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
22#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
23#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 22#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
24#include "video_core/renderer_vulkan/wrapper.h"
25#include "video_core/shader/async_shaders.h" 23#include "video_core/shader/async_shaders.h"
26#include "video_core/shader/memory_util.h" 24#include "video_core/shader/memory_util.h"
27#include "video_core/shader/registry.h" 25#include "video_core/shader/registry.h"
28#include "video_core/shader/shader_ir.h" 26#include "video_core/shader/shader_ir.h"
29#include "video_core/shader_cache.h" 27#include "video_core/shader_cache.h"
28#include "video_core/vulkan_common/vulkan_wrapper.h"
30 29
31namespace Core { 30namespace Core {
32class System; 31class System;
@@ -34,10 +33,10 @@ class System;
34 33
35namespace Vulkan { 34namespace Vulkan {
36 35
36class Device;
37class RasterizerVulkan; 37class RasterizerVulkan;
38class VKComputePipeline; 38class VKComputePipeline;
39class VKDescriptorPool; 39class VKDescriptorPool;
40class VKDevice;
41class VKScheduler; 40class VKScheduler;
42class VKUpdateDescriptorQueue; 41class VKUpdateDescriptorQueue;
43 42
@@ -119,18 +118,18 @@ private:
119 118
120class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { 119class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
121public: 120public:
122 explicit VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, 121 explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
123 Tegra::Engines::Maxwell3D& maxwell3d_, 122 Tegra::Engines::Maxwell3D& maxwell3d,
124 Tegra::Engines::KeplerCompute& kepler_compute_, 123 Tegra::Engines::KeplerCompute& kepler_compute,
125 Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, 124 Tegra::MemoryManager& gpu_memory, const Device& device,
126 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, 125 VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
127 VKUpdateDescriptorQueue& update_descriptor_queue_, 126 VKUpdateDescriptorQueue& update_descriptor_queue);
128 VKRenderPassCache& renderpass_cache_);
129 ~VKPipelineCache() override; 127 ~VKPipelineCache() override;
130 128
131 std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); 129 std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
132 130
133 VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, 131 VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
132 u32 num_color_buffers,
134 VideoCommon::Shader::AsyncShaders& async_shaders); 133 VideoCommon::Shader::AsyncShaders& async_shaders);
135 134
136 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); 135 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
@@ -149,11 +148,10 @@ private:
149 Tegra::Engines::KeplerCompute& kepler_compute; 148 Tegra::Engines::KeplerCompute& kepler_compute;
150 Tegra::MemoryManager& gpu_memory; 149 Tegra::MemoryManager& gpu_memory;
151 150
152 const VKDevice& device; 151 const Device& device;
153 VKScheduler& scheduler; 152 VKScheduler& scheduler;
154 VKDescriptorPool& descriptor_pool; 153 VKDescriptorPool& descriptor_pool;
155 VKUpdateDescriptorQueue& update_descriptor_queue; 154 VKUpdateDescriptorQueue& update_descriptor_queue;
156 VKRenderPassCache& renderpass_cache;
157 155
158 std::unique_ptr<Shader> null_shader; 156 std::unique_ptr<Shader> null_shader;
159 std::unique_ptr<Shader> null_kernel; 157 std::unique_ptr<Shader> null_kernel;
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 038760de3..7cadd5147 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -7,11 +7,11 @@
7#include <utility> 7#include <utility>
8#include <vector> 8#include <vector>
9 9
10#include "video_core/renderer_vulkan/vk_device.h"
11#include "video_core/renderer_vulkan/vk_query_cache.h" 10#include "video_core/renderer_vulkan/vk_query_cache.h"
12#include "video_core/renderer_vulkan/vk_resource_pool.h" 11#include "video_core/renderer_vulkan/vk_resource_pool.h"
13#include "video_core/renderer_vulkan/vk_scheduler.h" 12#include "video_core/renderer_vulkan/vk_scheduler.h"
14#include "video_core/renderer_vulkan/wrapper.h" 13#include "video_core/vulkan_common/vulkan_device.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h"
15 15
16namespace Vulkan { 16namespace Vulkan {
17 17
@@ -27,7 +27,7 @@ constexpr VkQueryType GetTarget(QueryType type) {
27 27
28} // Anonymous namespace 28} // Anonymous namespace
29 29
30QueryPool::QueryPool(const VKDevice& device_, VKScheduler& scheduler, QueryType type_) 30QueryPool::QueryPool(const Device& device_, VKScheduler& scheduler, QueryType type_)
31 : ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {} 31 : ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {}
32 32
33QueryPool::~QueryPool() = default; 33QueryPool::~QueryPool() = default;
@@ -68,7 +68,7 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
68 68
69VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer_, 69VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer_,
70 Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, 70 Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
71 const VKDevice& device_, VKScheduler& scheduler_) 71 const Device& device_, VKScheduler& scheduler_)
72 : QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_}, 72 : QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_},
73 query_pools{ 73 query_pools{
74 QueryPool{device_, scheduler_, QueryType::SamplesPassed}, 74 QueryPool{device_, scheduler_, QueryType::SamplesPassed},
@@ -96,9 +96,9 @@ void VKQueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) {
96HostCounter::HostCounter(VKQueryCache& cache_, std::shared_ptr<HostCounter> dependency_, 96HostCounter::HostCounter(VKQueryCache& cache_, std::shared_ptr<HostCounter> dependency_,
97 QueryType type_) 97 QueryType type_)
98 : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, 98 : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_},
99 query{cache_.AllocateQuery(type_)}, tick{cache_.Scheduler().CurrentTick()} { 99 query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} {
100 const vk::Device* logical = &cache_.Device().GetLogical(); 100 const vk::Device* logical = &cache.GetDevice().GetLogical();
101 cache_.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { 101 cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
102 logical->ResetQueryPoolEXT(query.first, query.second, 1); 102 logical->ResetQueryPoolEXT(query.first, query.second, 1);
103 cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT); 103 cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT);
104 }); 104 });
@@ -109,17 +109,17 @@ HostCounter::~HostCounter() {
109} 109}
110 110
111void HostCounter::EndQuery() { 111void HostCounter::EndQuery() {
112 cache.Scheduler().Record( 112 cache.GetScheduler().Record(
113 [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); }); 113 [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); });
114} 114}
115 115
116u64 HostCounter::BlockingQuery() const { 116u64 HostCounter::BlockingQuery() const {
117 if (tick >= cache.Scheduler().CurrentTick()) { 117 if (tick >= cache.GetScheduler().CurrentTick()) {
118 cache.Scheduler().Flush(); 118 cache.GetScheduler().Flush();
119 } 119 }
120 120
121 u64 data; 121 u64 data;
122 const VkResult query_result = cache.Device().GetLogical().GetQueryResults( 122 const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
123 query.first, query.second, 1, sizeof(data), &data, sizeof(data), 123 query.first, query.second, 1, sizeof(data), &data, sizeof(data),
124 VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); 124 VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
125 125
@@ -127,7 +127,7 @@ u64 HostCounter::BlockingQuery() const {
127 case VK_SUCCESS: 127 case VK_SUCCESS:
128 return data; 128 return data;
129 case VK_ERROR_DEVICE_LOST: 129 case VK_ERROR_DEVICE_LOST:
130 cache.Device().ReportLoss(); 130 cache.GetDevice().ReportLoss();
131 [[fallthrough]]; 131 [[fallthrough]];
132 default: 132 default:
133 throw vk::Exception(query_result); 133 throw vk::Exception(query_result);
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index 837fe9ebf..7190946b9 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -12,7 +12,7 @@
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/query_cache.h" 13#include "video_core/query_cache.h"
14#include "video_core/renderer_vulkan/vk_resource_pool.h" 14#include "video_core/renderer_vulkan/vk_resource_pool.h"
15#include "video_core/renderer_vulkan/wrapper.h" 15#include "video_core/vulkan_common/vulkan_wrapper.h"
16 16
17namespace VideoCore { 17namespace VideoCore {
18class RasterizerInterface; 18class RasterizerInterface;
@@ -21,8 +21,8 @@ class RasterizerInterface;
21namespace Vulkan { 21namespace Vulkan {
22 22
23class CachedQuery; 23class CachedQuery;
24class Device;
24class HostCounter; 25class HostCounter;
25class VKDevice;
26class VKQueryCache; 26class VKQueryCache;
27class VKScheduler; 27class VKScheduler;
28 28
@@ -30,7 +30,7 @@ using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>;
30 30
31class QueryPool final : public ResourcePool { 31class QueryPool final : public ResourcePool {
32public: 32public:
33 explicit QueryPool(const VKDevice& device, VKScheduler& scheduler, VideoCore::QueryType type); 33 explicit QueryPool(const Device& device, VKScheduler& scheduler, VideoCore::QueryType type);
34 ~QueryPool() override; 34 ~QueryPool() override;
35 35
36 std::pair<VkQueryPool, u32> Commit(); 36 std::pair<VkQueryPool, u32> Commit();
@@ -43,7 +43,7 @@ protected:
43private: 43private:
44 static constexpr std::size_t GROW_STEP = 512; 44 static constexpr std::size_t GROW_STEP = 512;
45 45
46 const VKDevice& device; 46 const Device& device;
47 const VideoCore::QueryType type; 47 const VideoCore::QueryType type;
48 48
49 std::vector<vk::QueryPool> pools; 49 std::vector<vk::QueryPool> pools;
@@ -55,23 +55,23 @@ class VKQueryCache final
55public: 55public:
56 explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer_, 56 explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer_,
57 Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, 57 Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
58 const VKDevice& device_, VKScheduler& scheduler_); 58 const Device& device_, VKScheduler& scheduler_);
59 ~VKQueryCache(); 59 ~VKQueryCache();
60 60
61 std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type); 61 std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type);
62 62
63 void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query); 63 void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query);
64 64
65 const VKDevice& Device() const noexcept { 65 const Device& GetDevice() const noexcept {
66 return device; 66 return device;
67 } 67 }
68 68
69 VKScheduler& Scheduler() const noexcept { 69 VKScheduler& GetScheduler() const noexcept {
70 return scheduler; 70 return scheduler;
71 } 71 }
72 72
73private: 73private:
74 const VKDevice& device; 74 const Device& device;
75 VKScheduler& scheduler; 75 VKScheduler& scheduler;
76 std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; 76 std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
77}; 77};
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f93986aab..93fbea510 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -19,6 +19,7 @@
19#include "core/settings.h" 19#include "core/settings.h"
20#include "video_core/engines/kepler_compute.h" 20#include "video_core/engines/kepler_compute.h"
21#include "video_core/engines/maxwell_3d.h" 21#include "video_core/engines/maxwell_3d.h"
22#include "video_core/renderer_vulkan/blit_image.h"
22#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 23#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
23#include "video_core/renderer_vulkan/maxwell_to_vk.h" 24#include "video_core/renderer_vulkan/maxwell_to_vk.h"
24#include "video_core/renderer_vulkan/renderer_vulkan.h" 25#include "video_core/renderer_vulkan/renderer_vulkan.h"
@@ -26,23 +27,24 @@
26#include "video_core/renderer_vulkan/vk_compute_pass.h" 27#include "video_core/renderer_vulkan/vk_compute_pass.h"
27#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 28#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
28#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 29#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
29#include "video_core/renderer_vulkan/vk_device.h"
30#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 30#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
31#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 31#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
32#include "video_core/renderer_vulkan/vk_rasterizer.h" 32#include "video_core/renderer_vulkan/vk_rasterizer.h"
33#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
34#include "video_core/renderer_vulkan/vk_sampler_cache.h"
35#include "video_core/renderer_vulkan/vk_scheduler.h" 33#include "video_core/renderer_vulkan/vk_scheduler.h"
36#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 34#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
37#include "video_core/renderer_vulkan/vk_state_tracker.h" 35#include "video_core/renderer_vulkan/vk_state_tracker.h"
38#include "video_core/renderer_vulkan/vk_texture_cache.h" 36#include "video_core/renderer_vulkan/vk_texture_cache.h"
39#include "video_core/renderer_vulkan/vk_update_descriptor.h" 37#include "video_core/renderer_vulkan/vk_update_descriptor.h"
40#include "video_core/renderer_vulkan/wrapper.h"
41#include "video_core/shader_cache.h" 38#include "video_core/shader_cache.h"
39#include "video_core/texture_cache/texture_cache.h"
40#include "video_core/vulkan_common/vulkan_device.h"
41#include "video_core/vulkan_common/vulkan_wrapper.h"
42 42
43namespace Vulkan { 43namespace Vulkan {
44 44
45using Maxwell = Tegra::Engines::Maxwell3D::Regs; 45using Maxwell = Tegra::Engines::Maxwell3D::Regs;
46using VideoCommon::ImageViewId;
47using VideoCommon::ImageViewType;
46 48
47MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); 49MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192));
48MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); 50MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128));
@@ -58,9 +60,9 @@ MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192
58 60
59namespace { 61namespace {
60 62
61constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute); 63constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute);
62 64
63VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { 65VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) {
64 const auto& src = regs.viewport_transform[index]; 66 const auto& src = regs.viewport_transform[index];
65 const float width = src.scale_x * 2.0f; 67 const float width = src.scale_x * 2.0f;
66 const float height = src.scale_y * 2.0f; 68 const float height = src.scale_y * 2.0f;
@@ -83,7 +85,7 @@ VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::si
83 return viewport; 85 return viewport;
84} 86}
85 87
86VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { 88VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
87 const auto& src = regs.scissor_test[index]; 89 const auto& src = regs.scissor_test[index];
88 VkRect2D scissor; 90 VkRect2D scissor;
89 if (src.enable) { 91 if (src.enable) {
@@ -103,98 +105,122 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {
103std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( 105std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
104 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { 106 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
105 std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; 107 std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
106 for (std::size_t i = 0; i < std::size(addresses); ++i) { 108 for (size_t i = 0; i < std::size(addresses); ++i) {
107 addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; 109 addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
108 } 110 }
109 return addresses; 111 return addresses;
110} 112}
111 113
112void TransitionImages(const std::vector<ImageView>& views, VkPipelineStageFlags pipeline_stage, 114struct TextureHandle {
113 VkAccessFlags access) { 115 constexpr TextureHandle(u32 data, bool via_header_index) {
114 for (auto& [view, layout] : views) { 116 const Tegra::Texture::TextureHandle handle{data};
115 view->Transition(*layout, pipeline_stage, access); 117 image = handle.tic_id;
118 sampler = via_header_index ? image : handle.tsc_id.Value();
116 } 119 }
117} 120
121 u32 image;
122 u32 sampler;
123};
118 124
119template <typename Engine, typename Entry> 125template <typename Engine, typename Entry>
120Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, 126TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
121 std::size_t stage, std::size_t index = 0) { 127 size_t stage, size_t index = 0) {
122 const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); 128 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
123 if constexpr (std::is_same_v<Entry, SamplerEntry>) { 129 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
124 if (entry.is_separated) { 130 if (entry.is_separated) {
125 const u32 buffer_1 = entry.buffer; 131 const u32 buffer_1 = entry.buffer;
126 const u32 buffer_2 = entry.secondary_buffer; 132 const u32 buffer_2 = entry.secondary_buffer;
127 const u32 offset_1 = entry.offset; 133 const u32 offset_1 = entry.offset;
128 const u32 offset_2 = entry.secondary_offset; 134 const u32 offset_2 = entry.secondary_offset;
129 const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1); 135 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
130 const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2); 136 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
131 return engine.GetTextureInfo(Tegra::Texture::TextureHandle{handle_1 | handle_2}); 137 return TextureHandle(handle_1 | handle_2, via_header_index);
132 } 138 }
133 } 139 }
134 if (entry.is_bindless) { 140 if (entry.is_bindless) {
135 const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); 141 const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
136 return engine.GetTextureInfo(Tegra::Texture::TextureHandle{tex_handle}); 142 return TextureHandle(raw, via_header_index);
137 }
138 const auto& gpu_profile = engine.AccessGuestDriverProfile();
139 const u32 entry_offset = static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
140 const u32 offset = entry.offset + entry_offset;
141 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
142 return engine.GetStageTexture(stage_type, offset);
143 } else {
144 return engine.GetTexture(offset);
145 }
146}
147
148/// @brief Determine if an attachment to be updated has to preserve contents
149/// @param is_clear True when a clear is being executed
150/// @param regs 3D registers
151/// @return True when the contents have to be preserved
152bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) {
153 if (!is_clear) {
154 return true;
155 }
156 // First we have to make sure all clear masks are enabled.
157 if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B ||
158 !regs.clear_buffers.A) {
159 return true;
160 }
161 // If scissors are disabled, the whole screen is cleared
162 if (!regs.clear_flags.scissor) {
163 return false;
164 } 143 }
165 // Then we have to confirm scissor testing clears the whole image 144 const u32 buffer = engine.GetBoundBuffer();
166 const std::size_t index = regs.clear_buffers.RT; 145 const u64 offset = (entry.offset + index) * sizeof(u32);
167 const auto& scissor = regs.scissor_test[0]; 146 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
168 return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width ||
169 scissor.max_y < regs.rt[index].height;
170} 147}
171 148
172/// @brief Determine if an attachment to be updated has to preserve contents 149template <size_t N>
173/// @param is_clear True when a clear is being executed
174/// @param regs 3D registers
175/// @return True when the contents have to be preserved
176bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) {
177 // If we are not clearing, the contents have to be preserved
178 if (!is_clear) {
179 return true;
180 }
181 // For depth stencil clears we only have to confirm scissor test covers the whole image
182 if (!regs.clear_flags.scissor) {
183 return false;
184 }
185 // Make sure the clear cover the whole image
186 const auto& scissor = regs.scissor_test[0];
187 return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width ||
188 scissor.max_y < regs.zeta_height;
189}
190
191template <std::size_t N>
192std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) { 150std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) {
193 std::array<VkDeviceSize, N> expanded; 151 std::array<VkDeviceSize, N> expanded;
194 std::copy(strides.begin(), strides.end(), expanded.begin()); 152 std::copy(strides.begin(), strides.end(), expanded.begin());
195 return expanded; 153 return expanded;
196} 154}
197 155
156ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
157 if (entry.is_buffer) {
158 return ImageViewType::e2D;
159 }
160 switch (entry.type) {
161 case Tegra::Shader::TextureType::Texture1D:
162 return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
163 case Tegra::Shader::TextureType::Texture2D:
164 return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
165 case Tegra::Shader::TextureType::Texture3D:
166 return ImageViewType::e3D;
167 case Tegra::Shader::TextureType::TextureCube:
168 return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
169 }
170 UNREACHABLE();
171 return ImageViewType::e2D;
172}
173
174ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
175 switch (entry.type) {
176 case Tegra::Shader::ImageType::Texture1D:
177 return ImageViewType::e1D;
178 case Tegra::Shader::ImageType::Texture1DArray:
179 return ImageViewType::e1DArray;
180 case Tegra::Shader::ImageType::Texture2D:
181 return ImageViewType::e2D;
182 case Tegra::Shader::ImageType::Texture2DArray:
183 return ImageViewType::e2DArray;
184 case Tegra::Shader::ImageType::Texture3D:
185 return ImageViewType::e3D;
186 case Tegra::Shader::ImageType::TextureBuffer:
187 return ImageViewType::Buffer;
188 }
189 UNREACHABLE();
190 return ImageViewType::e2D;
191}
192
193void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache,
194 VKUpdateDescriptorQueue& update_descriptor_queue,
195 ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) {
196 for ([[maybe_unused]] const auto& entry : entries.uniform_texels) {
197 const ImageViewId image_view_id = *image_view_id_ptr++;
198 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
199 update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
200 }
201 for (const auto& entry : entries.samplers) {
202 for (size_t i = 0; i < entry.size; ++i) {
203 const VkSampler sampler = *sampler_ptr++;
204 const ImageViewId image_view_id = *image_view_id_ptr++;
205 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
206 const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
207 update_descriptor_queue.AddSampledImage(handle, sampler);
208 }
209 }
210 for ([[maybe_unused]] const auto& entry : entries.storage_texels) {
211 const ImageViewId image_view_id = *image_view_id_ptr++;
212 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
213 update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
214 }
215 for (const auto& entry : entries.images) {
216 // TODO: Mark as modified
217 const ImageViewId image_view_id = *image_view_id_ptr++;
218 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
219 const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
220 update_descriptor_queue.AddImage(handle);
221 }
222}
223
198} // Anonymous namespace 224} // Anonymous namespace
199 225
200class BufferBindings final { 226class BufferBindings final {
@@ -213,7 +239,7 @@ public:
213 index.type = type; 239 index.type = type;
214 } 240 }
215 241
216 void Bind(const VKDevice& device, VKScheduler& scheduler) const { 242 void Bind(const Device& device, VKScheduler& scheduler) const {
217 // Use this large switch case to avoid dispatching more memory in the record lambda than 243 // Use this large switch case to avoid dispatching more memory in the record lambda than
218 // what we need. It looks horrible, but it's the best we can do on standard C++. 244 // what we need. It looks horrible, but it's the best we can do on standard C++.
219 switch (vertex.num_buffers) { 245 switch (vertex.num_buffers) {
@@ -290,7 +316,7 @@ public:
290private: 316private:
291 // Some of these fields are intentionally left uninitialized to avoid initializing them twice. 317 // Some of these fields are intentionally left uninitialized to avoid initializing them twice.
292 struct { 318 struct {
293 std::size_t num_buffers = 0; 319 size_t num_buffers = 0;
294 std::array<VkBuffer, Maxwell::NumVertexArrays> buffers; 320 std::array<VkBuffer, Maxwell::NumVertexArrays> buffers;
295 std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; 321 std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets;
296 std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes; 322 std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes;
@@ -303,8 +329,8 @@ private:
303 VkIndexType type; 329 VkIndexType type;
304 } index; 330 } index;
305 331
306 template <std::size_t N> 332 template <size_t N>
307 void BindStatic(const VKDevice& device, VKScheduler& scheduler) const { 333 void BindStatic(const Device& device, VKScheduler& scheduler) const {
308 if (device.IsExtExtendedDynamicStateSupported()) { 334 if (device.IsExtExtendedDynamicStateSupported()) {
309 if (index.buffer) { 335 if (index.buffer) {
310 BindStatic<N, true, true>(scheduler); 336 BindStatic<N, true, true>(scheduler);
@@ -320,7 +346,7 @@ private:
320 } 346 }
321 } 347 }
322 348
323 template <std::size_t N, bool is_indexed, bool has_extended_dynamic_state> 349 template <size_t N, bool is_indexed, bool has_extended_dynamic_state>
324 void BindStatic(VKScheduler& scheduler) const { 350 void BindStatic(VKScheduler& scheduler) const {
325 static_assert(N <= Maxwell::NumVertexArrays); 351 static_assert(N <= Maxwell::NumVertexArrays);
326 if constexpr (N == 0) { 352 if constexpr (N == 0) {
@@ -383,22 +409,25 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const {
383RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 409RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
384 Tegra::MemoryManager& gpu_memory_, 410 Tegra::MemoryManager& gpu_memory_,
385 Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_, 411 Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_,
386 const VKDevice& device_, VKMemoryManager& memory_manager_, 412 const Device& device_, VKMemoryManager& memory_manager_,
387 StateTracker& state_tracker_, VKScheduler& scheduler_) 413 StateTracker& state_tracker_, VKScheduler& scheduler_)
388 : RasterizerAccelerated(cpu_memory_), gpu(gpu_), gpu_memory(gpu_memory_), 414 : RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
389 maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), screen_info(screen_info_), 415 gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
390 device(device_), memory_manager(memory_manager_), state_tracker(state_tracker_), 416 screen_info{screen_info_}, device{device_}, memory_manager{memory_manager_},
391 scheduler(scheduler_), staging_pool(device, memory_manager, scheduler), 417 state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler),
392 descriptor_pool(device, scheduler_), update_descriptor_queue(device, scheduler), 418 staging_pool(device, memory_manager, scheduler), descriptor_pool(device, scheduler),
393 renderpass_cache(device), 419 update_descriptor_queue(device, scheduler),
420 blit_image(device, scheduler, state_tracker, descriptor_pool),
394 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 421 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
395 quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 422 quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
396 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 423 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
397 texture_cache(*this, maxwell3d, gpu_memory, device, memory_manager, scheduler, staging_pool), 424 texture_cache_runtime{device, scheduler, memory_manager, staging_pool, blit_image},
425 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
398 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, 426 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
399 descriptor_pool, update_descriptor_queue, renderpass_cache), 427 descriptor_pool, update_descriptor_queue),
400 buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, staging_pool), 428 buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, stream_buffer,
401 sampler_cache(device), query_cache(*this, maxwell3d, gpu_memory, device, scheduler), 429 staging_pool),
430 query_cache{*this, maxwell3d, gpu_memory, device, scheduler},
402 fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device, 431 fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device,
403 scheduler), 432 scheduler),
404 wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { 433 wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) {
@@ -427,9 +456,10 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
427 const DrawParameters draw_params = 456 const DrawParameters draw_params =
428 SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); 457 SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced);
429 458
430 update_descriptor_queue.Acquire(); 459 auto lock = texture_cache.AcquireLock();
431 sampled_views.clear(); 460 texture_cache.SynchronizeGraphicsDescriptors();
432 image_views.clear(); 461
462 texture_cache.UpdateRenderTargets(false);
433 463
434 const auto shaders = pipeline_cache.GetShaders(); 464 const auto shaders = pipeline_cache.GetShaders();
435 key.shaders = GetShaderAddresses(shaders); 465 key.shaders = GetShaderAddresses(shaders);
@@ -437,30 +467,24 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
437 467
438 buffer_cache.Unmap(); 468 buffer_cache.Unmap();
439 469
440 const Texceptions texceptions = UpdateAttachments(false); 470 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
441 SetupImageTransitions(texceptions, color_attachments, zeta_attachment); 471 key.renderpass = framebuffer->RenderPass();
442
443 key.renderpass_params = GetRenderPassParams(texceptions);
444 key.padding = 0;
445 472
446 auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); 473 auto* const pipeline =
474 pipeline_cache.GetGraphicsPipeline(key, framebuffer->NumColorBuffers(), async_shaders);
447 if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { 475 if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
448 // Async graphics pipeline was not ready. 476 // Async graphics pipeline was not ready.
449 return; 477 return;
450 } 478 }
451 479
452 scheduler.BindGraphicsPipeline(pipeline->GetHandle());
453
454 const auto renderpass = pipeline->GetRenderPass();
455 const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
456 scheduler.RequestRenderpass(renderpass, framebuffer, render_area);
457
458 UpdateDynamicStates();
459
460 buffer_bindings.Bind(device, scheduler); 480 buffer_bindings.Bind(device, scheduler);
461 481
462 BeginTransformFeedback(); 482 BeginTransformFeedback();
463 483
484 scheduler.RequestRenderpass(framebuffer);
485 scheduler.BindGraphicsPipeline(pipeline->GetHandle());
486 UpdateDynamicStates();
487
464 const auto pipeline_layout = pipeline->GetLayout(); 488 const auto pipeline_layout = pipeline->GetLayout();
465 const auto descriptor_set = pipeline->CommitDescriptorSet(); 489 const auto descriptor_set = pipeline->CommitDescriptorSet();
466 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { 490 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
@@ -481,9 +505,6 @@ void RasterizerVulkan::Clear() {
481 return; 505 return;
482 } 506 }
483 507
484 sampled_views.clear();
485 image_views.clear();
486
487 query_cache.UpdateCounters(); 508 query_cache.UpdateCounters();
488 509
489 const auto& regs = maxwell3d.regs; 510 const auto& regs = maxwell3d.regs;
@@ -495,20 +516,24 @@ void RasterizerVulkan::Clear() {
495 return; 516 return;
496 } 517 }
497 518
498 [[maybe_unused]] const auto texceptions = UpdateAttachments(true); 519 auto lock = texture_cache.AcquireLock();
499 DEBUG_ASSERT(texceptions.none()); 520 texture_cache.UpdateRenderTargets(true);
500 SetupImageTransitions(0, color_attachments, zeta_attachment); 521 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
501 522 const VkExtent2D render_area = framebuffer->RenderArea();
502 const VkRenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); 523 scheduler.RequestRenderpass(framebuffer);
503 const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
504 scheduler.RequestRenderpass(renderpass, framebuffer, render_area);
505 524
506 VkClearRect clear_rect; 525 VkClearRect clear_rect{
507 clear_rect.baseArrayLayer = regs.clear_buffers.layer; 526 .rect = GetScissorState(regs, 0),
508 clear_rect.layerCount = 1; 527 .baseArrayLayer = regs.clear_buffers.layer,
509 clear_rect.rect = GetScissorState(regs, 0); 528 .layerCount = 1,
510 clear_rect.rect.extent.width = std::min(clear_rect.rect.extent.width, render_area.width); 529 };
511 clear_rect.rect.extent.height = std::min(clear_rect.rect.extent.height, render_area.height); 530 if (clear_rect.rect.extent.width == 0 || clear_rect.rect.extent.height == 0) {
531 return;
532 }
533 clear_rect.rect.extent = VkExtent2D{
534 .width = std::min(clear_rect.rect.extent.width, render_area.width),
535 .height = std::min(clear_rect.rect.extent.height, render_area.height),
536 };
512 537
513 if (use_color) { 538 if (use_color) {
514 VkClearValue clear_value; 539 VkClearValue clear_value;
@@ -549,9 +574,6 @@ void RasterizerVulkan::Clear() {
549 574
550void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { 575void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
551 MICROPROFILE_SCOPE(Vulkan_Compute); 576 MICROPROFILE_SCOPE(Vulkan_Compute);
552 update_descriptor_queue.Acquire();
553 sampled_views.clear();
554 image_views.clear();
555 577
556 query_cache.UpdateCounters(); 578 query_cache.UpdateCounters();
557 579
@@ -570,30 +592,46 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
570 // Compute dispatches can't be executed inside a renderpass 592 // Compute dispatches can't be executed inside a renderpass
571 scheduler.RequestOutsideRenderPassOperationContext(); 593 scheduler.RequestOutsideRenderPassOperationContext();
572 594
573 buffer_cache.Map(CalculateComputeStreamBufferSize()); 595 image_view_indices.clear();
596 sampler_handles.clear();
597
598 auto lock = texture_cache.AcquireLock();
599 texture_cache.SynchronizeComputeDescriptors();
574 600
575 const auto& entries = pipeline.GetEntries(); 601 const auto& entries = pipeline.GetEntries();
576 SetupComputeConstBuffers(entries);
577 SetupComputeGlobalBuffers(entries);
578 SetupComputeUniformTexels(entries); 602 SetupComputeUniformTexels(entries);
579 SetupComputeTextures(entries); 603 SetupComputeTextures(entries);
580 SetupComputeStorageTexels(entries); 604 SetupComputeStorageTexels(entries);
581 SetupComputeImages(entries); 605 SetupComputeImages(entries);
582 606
583 buffer_cache.Unmap(); 607 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
608 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
584 609
585 TransitionImages(sampled_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 610 buffer_cache.Map(CalculateComputeStreamBufferSize());
586 VK_ACCESS_SHADER_READ_BIT);
587 TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
588 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
589 611
612 update_descriptor_queue.Acquire();
613
614 SetupComputeConstBuffers(entries);
615 SetupComputeGlobalBuffers(entries);
616
617 ImageViewId* image_view_id_ptr = image_view_ids.data();
618 VkSampler* sampler_ptr = sampler_handles.data();
619 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
620 sampler_ptr);
621
622 buffer_cache.Unmap();
623
624 const VkPipeline pipeline_handle = pipeline.GetHandle();
625 const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
626 const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
590 scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, 627 scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
591 grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), 628 grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout,
592 layout = pipeline.GetLayout(), 629 descriptor_set](vk::CommandBuffer cmdbuf) {
593 descriptor_set = pipeline.CommitDescriptorSet()](vk::CommandBuffer cmdbuf) {
594 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); 630 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
595 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, DESCRIPTOR_SET, 631 if (descriptor_set) {
596 descriptor_set, {}); 632 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout,
633 DESCRIPTOR_SET, descriptor_set, nullptr);
634 }
597 cmdbuf.Dispatch(grid_x, grid_y, grid_z); 635 cmdbuf.Dispatch(grid_x, grid_y, grid_z);
598 }); 636 });
599} 637}
@@ -613,7 +651,10 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
613 if (addr == 0 || size == 0) { 651 if (addr == 0 || size == 0) {
614 return; 652 return;
615 } 653 }
616 texture_cache.FlushRegion(addr, size); 654 {
655 auto lock = texture_cache.AcquireLock();
656 texture_cache.DownloadMemory(addr, size);
657 }
617 buffer_cache.FlushRegion(addr, size); 658 buffer_cache.FlushRegion(addr, size);
618 query_cache.FlushRegion(addr, size); 659 query_cache.FlushRegion(addr, size);
619} 660}
@@ -622,14 +663,18 @@ bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
622 if (!Settings::IsGPULevelHigh()) { 663 if (!Settings::IsGPULevelHigh()) {
623 return buffer_cache.MustFlushRegion(addr, size); 664 return buffer_cache.MustFlushRegion(addr, size);
624 } 665 }
625 return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); 666 return texture_cache.IsRegionGpuModified(addr, size) ||
667 buffer_cache.MustFlushRegion(addr, size);
626} 668}
627 669
628void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { 670void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
629 if (addr == 0 || size == 0) { 671 if (addr == 0 || size == 0) {
630 return; 672 return;
631 } 673 }
632 texture_cache.InvalidateRegion(addr, size); 674 {
675 auto lock = texture_cache.AcquireLock();
676 texture_cache.WriteMemory(addr, size);
677 }
633 pipeline_cache.InvalidateRegion(addr, size); 678 pipeline_cache.InvalidateRegion(addr, size);
634 buffer_cache.InvalidateRegion(addr, size); 679 buffer_cache.InvalidateRegion(addr, size);
635 query_cache.InvalidateRegion(addr, size); 680 query_cache.InvalidateRegion(addr, size);
@@ -639,17 +684,28 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
639 if (addr == 0 || size == 0) { 684 if (addr == 0 || size == 0) {
640 return; 685 return;
641 } 686 }
642 texture_cache.OnCPUWrite(addr, size); 687 {
688 auto lock = texture_cache.AcquireLock();
689 texture_cache.WriteMemory(addr, size);
690 }
643 pipeline_cache.OnCPUWrite(addr, size); 691 pipeline_cache.OnCPUWrite(addr, size);
644 buffer_cache.OnCPUWrite(addr, size); 692 buffer_cache.OnCPUWrite(addr, size);
645} 693}
646 694
647void RasterizerVulkan::SyncGuestHost() { 695void RasterizerVulkan::SyncGuestHost() {
648 texture_cache.SyncGuestHost();
649 buffer_cache.SyncGuestHost(); 696 buffer_cache.SyncGuestHost();
650 pipeline_cache.SyncGuestHost(); 697 pipeline_cache.SyncGuestHost();
651} 698}
652 699
700void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
701 {
702 auto lock = texture_cache.AcquireLock();
703 texture_cache.UnmapMemory(addr, size);
704 }
705 buffer_cache.OnCPUWrite(addr, size);
706 pipeline_cache.OnCPUWrite(addr, size);
707}
708
653void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { 709void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
654 if (!gpu.IsAsync()) { 710 if (!gpu.IsAsync()) {
655 gpu_memory.Write<u32>(addr, value); 711 gpu_memory.Write<u32>(addr, value);
@@ -700,6 +756,14 @@ void RasterizerVulkan::WaitForIdle() {
700 }); 756 });
701} 757}
702 758
759void RasterizerVulkan::FragmentBarrier() {
760 // We already put barriers when a render pass finishes
761}
762
763void RasterizerVulkan::TiledCacheBarrier() {
764 // TODO: Implementing tiled barriers requires rewriting a good chunk of the Vulkan backend
765}
766
703void RasterizerVulkan::FlushCommands() { 767void RasterizerVulkan::FlushCommands() {
704 if (draw_counter > 0) { 768 if (draw_counter > 0) {
705 draw_counter = 0; 769 draw_counter = 0;
@@ -710,14 +774,20 @@ void RasterizerVulkan::FlushCommands() {
710void RasterizerVulkan::TickFrame() { 774void RasterizerVulkan::TickFrame() {
711 draw_counter = 0; 775 draw_counter = 0;
712 update_descriptor_queue.TickFrame(); 776 update_descriptor_queue.TickFrame();
777 fence_manager.TickFrame();
713 buffer_cache.TickFrame(); 778 buffer_cache.TickFrame();
714 staging_pool.TickFrame(); 779 staging_pool.TickFrame();
780 {
781 auto lock = texture_cache.AcquireLock();
782 texture_cache.TickFrame();
783 }
715} 784}
716 785
717bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 786bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
718 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 787 const Tegra::Engines::Fermi2D::Surface& dst,
719 const Tegra::Engines::Fermi2D::Config& copy_config) { 788 const Tegra::Engines::Fermi2D::Config& copy_config) {
720 texture_cache.DoFermiCopy(src, dst, copy_config); 789 auto lock = texture_cache.AcquireLock();
790 texture_cache.BlitImage(dst, src, copy_config);
721 return true; 791 return true;
722} 792}
723 793
@@ -727,20 +797,16 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
727 return false; 797 return false;
728 } 798 }
729 799
730 const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; 800 auto lock = texture_cache.AcquireLock();
731 if (!surface) { 801 ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr);
802 if (!image_view) {
732 return false; 803 return false;
733 } 804 }
734 805
735 // Verify that the cached surface is the same size and format as the requested framebuffer 806 screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D);
736 const auto& params{surface->GetSurfaceParams()}; 807 screen_info.width = image_view->size.width;
737 ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); 808 screen_info.height = image_view->size.height;
738 ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); 809 screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
739
740 screen_info.image = &surface->GetImage();
741 screen_info.width = params.width;
742 screen_info.height = params.height;
743 screen_info.is_srgb = surface->GetSurfaceParams().srgb_conversion;
744 return true; 810 return true;
745} 811}
746 812
@@ -765,103 +831,6 @@ void RasterizerVulkan::FlushWork() {
765 draw_counter = 0; 831 draw_counter = 0;
766} 832}
767 833
768RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) {
769 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
770
771 const auto& regs = maxwell3d.regs;
772 auto& dirty = maxwell3d.dirty.flags;
773 const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
774 dirty[VideoCommon::Dirty::RenderTargets] = false;
775
776 texture_cache.GuardRenderTargets(true);
777
778 Texceptions texceptions;
779 for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
780 if (update_rendertargets) {
781 const bool preserve_contents = HasToPreserveColorContents(is_clear, regs);
782 color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents);
783 }
784 if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
785 texceptions[rt] = true;
786 }
787 }
788
789 if (update_rendertargets) {
790 const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs);
791 zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents);
792 }
793 if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
794 texceptions[ZETA_TEXCEPTION_INDEX] = true;
795 }
796
797 texture_cache.GuardRenderTargets(false);
798
799 return texceptions;
800}
801
802bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachment) {
803 bool overlap = false;
804 for (auto& [view, layout] : sampled_views) {
805 if (!attachment.IsSameSurface(*view)) {
806 continue;
807 }
808 overlap = true;
809 *layout = VK_IMAGE_LAYOUT_GENERAL;
810 }
811 return overlap;
812}
813
814std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
815 VkRenderPass renderpass) {
816 FramebufferCacheKey key{
817 .renderpass = renderpass,
818 .width = std::numeric_limits<u32>::max(),
819 .height = std::numeric_limits<u32>::max(),
820 .layers = std::numeric_limits<u32>::max(),
821 .views = {},
822 };
823
824 const auto try_push = [&key](const View& view) {
825 if (!view) {
826 return false;
827 }
828 key.views.push_back(view->GetAttachment());
829 key.width = std::min(key.width, view->GetWidth());
830 key.height = std::min(key.height, view->GetHeight());
831 key.layers = std::min(key.layers, view->GetNumLayers());
832 return true;
833 };
834
835 const auto& regs = maxwell3d.regs;
836 const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
837 for (std::size_t index = 0; index < num_attachments; ++index) {
838 if (try_push(color_attachments[index])) {
839 texture_cache.MarkColorBufferInUse(index);
840 }
841 }
842 if (try_push(zeta_attachment)) {
843 texture_cache.MarkDepthBufferInUse();
844 }
845
846 const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key);
847 auto& framebuffer = fbentry->second;
848 if (is_cache_miss) {
849 framebuffer = device.GetLogical().CreateFramebuffer({
850 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
851 .pNext = nullptr,
852 .flags = 0,
853 .renderPass = key.renderpass,
854 .attachmentCount = static_cast<u32>(key.views.size()),
855 .pAttachments = key.views.data(),
856 .width = key.width,
857 .height = key.height,
858 .layers = key.layers,
859 });
860 }
861
862 return {*framebuffer, VkExtent2D{key.width, key.height}};
863}
864
865RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, 834RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state,
866 BufferBindings& buffer_bindings, 835 BufferBindings& buffer_bindings,
867 bool is_indexed, 836 bool is_indexed,
@@ -885,50 +854,37 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
885 854
886void RasterizerVulkan::SetupShaderDescriptors( 855void RasterizerVulkan::SetupShaderDescriptors(
887 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { 856 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
888 texture_cache.GuardSamplers(true); 857 image_view_indices.clear();
889 858 sampler_handles.clear();
890 for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { 859 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
891 // Skip VertexA stage
892 Shader* const shader = shaders[stage + 1]; 860 Shader* const shader = shaders[stage + 1];
893 if (!shader) { 861 if (!shader) {
894 continue; 862 continue;
895 } 863 }
896 const auto& entries = shader->GetEntries(); 864 const auto& entries = shader->GetEntries();
897 SetupGraphicsConstBuffers(entries, stage);
898 SetupGraphicsGlobalBuffers(entries, stage);
899 SetupGraphicsUniformTexels(entries, stage); 865 SetupGraphicsUniformTexels(entries, stage);
900 SetupGraphicsTextures(entries, stage); 866 SetupGraphicsTextures(entries, stage);
901 SetupGraphicsStorageTexels(entries, stage); 867 SetupGraphicsStorageTexels(entries, stage);
902 SetupGraphicsImages(entries, stage); 868 SetupGraphicsImages(entries, stage);
903 } 869 }
904 texture_cache.GuardSamplers(false); 870 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
905} 871 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
906 872
907void RasterizerVulkan::SetupImageTransitions(Texceptions texceptions, const ColorAttachments& color, 873 update_descriptor_queue.Acquire();
908 const ZetaAttachment& zeta) {
909 TransitionImages(sampled_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_ACCESS_SHADER_READ_BIT);
910 TransitionImages(image_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
911 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
912 874
913 for (std::size_t rt = 0; rt < color.size(); ++rt) { 875 ImageViewId* image_view_id_ptr = image_view_ids.data();
914 const auto color_attachment = color[rt]; 876 VkSampler* sampler_ptr = sampler_handles.data();
915 if (color_attachment == nullptr) { 877 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
878 // Skip VertexA stage
879 Shader* const shader = shaders[stage + 1];
880 if (!shader) {
916 continue; 881 continue;
917 } 882 }
918 const auto image_layout = 883 const auto& entries = shader->GetEntries();
919 texceptions[rt] ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; 884 SetupGraphicsConstBuffers(entries, stage);
920 color_attachment->Transition(image_layout, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 885 SetupGraphicsGlobalBuffers(entries, stage);
921 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | 886 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
922 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); 887 sampler_ptr);
923 }
924
925 if (zeta != nullptr) {
926 const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX]
927 ? VK_IMAGE_LAYOUT_GENERAL
928 : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
929 zeta->Transition(image_layout, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
930 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
931 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
932 } 888 }
933} 889}
934 890
@@ -1000,7 +956,7 @@ void RasterizerVulkan::EndTransformFeedback() {
1000void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { 956void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
1001 const auto& regs = maxwell3d.regs; 957 const auto& regs = maxwell3d.regs;
1002 958
1003 for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { 959 for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
1004 const auto& vertex_array = regs.vertex_array[index]; 960 const auto& vertex_array = regs.vertex_array[index];
1005 if (!vertex_array.IsEnabled()) { 961 if (!vertex_array.IsEnabled()) {
1006 continue; 962 continue;
@@ -1009,7 +965,7 @@ void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
1009 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; 965 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
1010 966
1011 ASSERT(end >= start); 967 ASSERT(end >= start);
1012 const std::size_t size = end - start; 968 const size_t size = end - start;
1013 if (size == 0) { 969 if (size == 0) {
1014 buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0); 970 buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0);
1015 continue; 971 continue;
@@ -1070,7 +1026,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
1070 } 1026 }
1071} 1027}
1072 1028
1073void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) { 1029void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) {
1074 MICROPROFILE_SCOPE(Vulkan_ConstBuffers); 1030 MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
1075 const auto& shader_stage = maxwell3d.state.shader_stages[stage]; 1031 const auto& shader_stage = maxwell3d.state.shader_stages[stage];
1076 for (const auto& entry : entries.const_buffers) { 1032 for (const auto& entry : entries.const_buffers) {
@@ -1078,7 +1034,7 @@ void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, s
1078 } 1034 }
1079} 1035}
1080 1036
1081void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) { 1037void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) {
1082 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); 1038 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
1083 const auto& cbufs{maxwell3d.state.shader_stages[stage]}; 1039 const auto& cbufs{maxwell3d.state.shader_stages[stage]};
1084 1040
@@ -1088,37 +1044,49 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries,
1088 } 1044 }
1089} 1045}
1090 1046
1091void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) { 1047void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
1092 MICROPROFILE_SCOPE(Vulkan_Textures); 1048 MICROPROFILE_SCOPE(Vulkan_Textures);
1049 const auto& regs = maxwell3d.regs;
1050 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1093 for (const auto& entry : entries.uniform_texels) { 1051 for (const auto& entry : entries.uniform_texels) {
1094 const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; 1052 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
1095 SetupUniformTexels(image, entry); 1053 image_view_indices.push_back(handle.image);
1096 } 1054 }
1097} 1055}
1098 1056
1099void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) { 1057void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
1100 MICROPROFILE_SCOPE(Vulkan_Textures); 1058 MICROPROFILE_SCOPE(Vulkan_Textures);
1059 const auto& regs = maxwell3d.regs;
1060 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1101 for (const auto& entry : entries.samplers) { 1061 for (const auto& entry : entries.samplers) {
1102 for (std::size_t i = 0; i < entry.size; ++i) { 1062 for (size_t index = 0; index < entry.size; ++index) {
1103 const auto texture = GetTextureInfo(maxwell3d, entry, stage, i); 1063 const TextureHandle handle =
1104 SetupTexture(texture, entry); 1064 GetTextureInfo(maxwell3d, via_header_index, entry, stage, index);
1065 image_view_indices.push_back(handle.image);
1066
1067 Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
1068 sampler_handles.push_back(sampler->Handle());
1105 } 1069 }
1106 } 1070 }
1107} 1071}
1108 1072
1109void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) { 1073void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
1110 MICROPROFILE_SCOPE(Vulkan_Textures); 1074 MICROPROFILE_SCOPE(Vulkan_Textures);
1075 const auto& regs = maxwell3d.regs;
1076 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1111 for (const auto& entry : entries.storage_texels) { 1077 for (const auto& entry : entries.storage_texels) {
1112 const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; 1078 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
1113 SetupStorageTexel(image, entry); 1079 image_view_indices.push_back(handle.image);
1114 } 1080 }
1115} 1081}
1116 1082
1117void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { 1083void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
1118 MICROPROFILE_SCOPE(Vulkan_Images); 1084 MICROPROFILE_SCOPE(Vulkan_Images);
1085 const auto& regs = maxwell3d.regs;
1086 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1119 for (const auto& entry : entries.images) { 1087 for (const auto& entry : entries.images) {
1120 const auto tic = GetTextureInfo(maxwell3d, entry, stage).tic; 1088 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
1121 SetupImage(tic, entry); 1089 image_view_indices.push_back(handle.image);
1122 } 1090 }
1123} 1091}
1124 1092
@@ -1128,11 +1096,12 @@ void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
1128 for (const auto& entry : entries.const_buffers) { 1096 for (const auto& entry : entries.const_buffers) {
1129 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; 1097 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
1130 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); 1098 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
1131 Tegra::Engines::ConstBufferInfo buffer; 1099 const Tegra::Engines::ConstBufferInfo info{
1132 buffer.address = config.Address(); 1100 .address = config.Address(),
1133 buffer.size = config.size; 1101 .size = config.size,
1134 buffer.enabled = mask[entry.GetIndex()]; 1102 .enabled = mask[entry.GetIndex()],
1135 SetupConstBuffer(entry, buffer); 1103 };
1104 SetupConstBuffer(entry, info);
1136 } 1105 }
1137} 1106}
1138 1107
@@ -1147,35 +1116,46 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
1147 1116
1148void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { 1117void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
1149 MICROPROFILE_SCOPE(Vulkan_Textures); 1118 MICROPROFILE_SCOPE(Vulkan_Textures);
1119 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1150 for (const auto& entry : entries.uniform_texels) { 1120 for (const auto& entry : entries.uniform_texels) {
1151 const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; 1121 const TextureHandle handle =
1152 SetupUniformTexels(image, entry); 1122 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
1123 image_view_indices.push_back(handle.image);
1153 } 1124 }
1154} 1125}
1155 1126
1156void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { 1127void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
1157 MICROPROFILE_SCOPE(Vulkan_Textures); 1128 MICROPROFILE_SCOPE(Vulkan_Textures);
1129 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1158 for (const auto& entry : entries.samplers) { 1130 for (const auto& entry : entries.samplers) {
1159 for (std::size_t i = 0; i < entry.size; ++i) { 1131 for (size_t index = 0; index < entry.size; ++index) {
1160 const auto texture = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex, i); 1132 const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry,
1161 SetupTexture(texture, entry); 1133 COMPUTE_SHADER_INDEX, index);
1134 image_view_indices.push_back(handle.image);
1135
1136 Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
1137 sampler_handles.push_back(sampler->Handle());
1162 } 1138 }
1163 } 1139 }
1164} 1140}
1165 1141
1166void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { 1142void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
1167 MICROPROFILE_SCOPE(Vulkan_Textures); 1143 MICROPROFILE_SCOPE(Vulkan_Textures);
1144 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1168 for (const auto& entry : entries.storage_texels) { 1145 for (const auto& entry : entries.storage_texels) {
1169 const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; 1146 const TextureHandle handle =
1170 SetupStorageTexel(image, entry); 1147 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
1148 image_view_indices.push_back(handle.image);
1171 } 1149 }
1172} 1150}
1173 1151
1174void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { 1152void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
1175 MICROPROFILE_SCOPE(Vulkan_Images); 1153 MICROPROFILE_SCOPE(Vulkan_Images);
1154 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1176 for (const auto& entry : entries.images) { 1155 for (const auto& entry : entries.images) {
1177 const auto tic = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; 1156 const TextureHandle handle =
1178 SetupImage(tic, entry); 1157 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
1158 image_view_indices.push_back(handle.image);
1179 } 1159 }
1180} 1160}
1181 1161
@@ -1186,14 +1166,12 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
1186 update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE); 1166 update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE);
1187 return; 1167 return;
1188 } 1168 }
1189
1190 // Align the size to avoid bad std140 interactions 1169 // Align the size to avoid bad std140 interactions
1191 const std::size_t size = 1170 const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
1192 Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
1193 ASSERT(size <= MaxConstbufferSize); 1171 ASSERT(size <= MaxConstbufferSize);
1194 1172
1195 const auto info = 1173 const u64 alignment = device.GetUniformBufferAlignment();
1196 buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); 1174 const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment);
1197 update_descriptor_queue.AddBuffer(info.handle, info.offset, size); 1175 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1198} 1176}
1199 1177
@@ -1206,7 +1184,7 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
1206 // because Vulkan doesn't like empty buffers. 1184 // because Vulkan doesn't like empty buffers.
1207 // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the 1185 // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
1208 // default buffer. 1186 // default buffer.
1209 static constexpr std::size_t dummy_size = 4; 1187 static constexpr size_t dummy_size = 4;
1210 const auto info = buffer_cache.GetEmptyBuffer(dummy_size); 1188 const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
1211 update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); 1189 update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
1212 return; 1190 return;
@@ -1217,55 +1195,6 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
1217 update_descriptor_queue.AddBuffer(info.handle, info.offset, size); 1195 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1218} 1196}
1219 1197
1220void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
1221 const UniformTexelEntry& entry) {
1222 const auto view = texture_cache.GetTextureSurface(tic, entry);
1223 ASSERT(view->IsBufferView());
1224
1225 update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
1226}
1227
1228void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& texture,
1229 const SamplerEntry& entry) {
1230 auto view = texture_cache.GetTextureSurface(texture.tic, entry);
1231 ASSERT(!view->IsBufferView());
1232
1233 const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source,
1234 texture.tic.z_source, texture.tic.w_source);
1235 const auto sampler = sampler_cache.GetSampler(texture.tsc);
1236 update_descriptor_queue.AddSampledImage(sampler, image_view);
1237
1238 VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
1239 *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1240 sampled_views.push_back(ImageView{std::move(view), image_layout});
1241}
1242
1243void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic,
1244 const StorageTexelEntry& entry) {
1245 const auto view = texture_cache.GetImageSurface(tic, entry);
1246 ASSERT(view->IsBufferView());
1247
1248 update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
1249}
1250
1251void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) {
1252 auto view = texture_cache.GetImageSurface(tic, entry);
1253
1254 if (entry.is_written) {
1255 view->MarkAsModified(texture_cache.Tick());
1256 }
1257
1258 UNIMPLEMENTED_IF(tic.IsBuffer());
1259
1260 const VkImageView image_view =
1261 view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
1262 update_descriptor_queue.AddImage(image_view);
1263
1264 VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
1265 *image_layout = VK_IMAGE_LAYOUT_GENERAL;
1266 image_views.push_back(ImageView{std::move(view), image_layout});
1267}
1268
1269void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { 1198void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
1270 if (!state_tracker.TouchViewports()) { 1199 if (!state_tracker.TouchViewports()) {
1271 return; 1200 return;
@@ -1457,8 +1386,8 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
1457 }); 1386 });
1458} 1387}
1459 1388
1460std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { 1389size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {
1461 std::size_t size = CalculateVertexArraysSize(); 1390 size_t size = CalculateVertexArraysSize();
1462 if (is_indexed) { 1391 if (is_indexed) {
1463 size = Common::AlignUp(size, 4) + CalculateIndexBufferSize(); 1392 size = Common::AlignUp(size, 4) + CalculateIndexBufferSize();
1464 } 1393 }
@@ -1466,15 +1395,15 @@ std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed)
1466 return size; 1395 return size;
1467} 1396}
1468 1397
1469std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { 1398size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
1470 return Tegra::Engines::KeplerCompute::NumConstBuffers * 1399 return Tegra::Engines::KeplerCompute::NumConstBuffers *
1471 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); 1400 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
1472} 1401}
1473 1402
1474std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { 1403size_t RasterizerVulkan::CalculateVertexArraysSize() const {
1475 const auto& regs = maxwell3d.regs; 1404 const auto& regs = maxwell3d.regs;
1476 1405
1477 std::size_t size = 0; 1406 size_t size = 0;
1478 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 1407 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
1479 // This implementation assumes that all attributes are used in the shader. 1408 // This implementation assumes that all attributes are used in the shader.
1480 const GPUVAddr start{regs.vertex_array[index].StartAddress()}; 1409 const GPUVAddr start{regs.vertex_array[index].StartAddress()};
@@ -1486,12 +1415,12 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
1486 return size; 1415 return size;
1487} 1416}
1488 1417
1489std::size_t RasterizerVulkan::CalculateIndexBufferSize() const { 1418size_t RasterizerVulkan::CalculateIndexBufferSize() const {
1490 return static_cast<std::size_t>(maxwell3d.regs.index_array.count) * 1419 return static_cast<size_t>(maxwell3d.regs.index_array.count) *
1491 static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); 1420 static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
1492} 1421}
1493 1422
1494std::size_t RasterizerVulkan::CalculateConstBufferSize( 1423size_t RasterizerVulkan::CalculateConstBufferSize(
1495 const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const { 1424 const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const {
1496 if (entry.IsIndirect()) { 1425 if (entry.IsIndirect()) {
1497 // Buffer is accessed indirectly, so upload the entire thing 1426 // Buffer is accessed indirectly, so upload the entire thing
@@ -1502,37 +1431,10 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize(
1502 } 1431 }
1503} 1432}
1504 1433
1505RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const {
1506 const auto& regs = maxwell3d.regs;
1507 const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
1508
1509 RenderPassParams params;
1510 params.color_formats = {};
1511 std::size_t color_texceptions = 0;
1512
1513 std::size_t index = 0;
1514 for (std::size_t rt = 0; rt < num_attachments; ++rt) {
1515 const auto& rendertarget = regs.rt[rt];
1516 if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) {
1517 continue;
1518 }
1519 params.color_formats[index] = static_cast<u8>(rendertarget.format);
1520 color_texceptions |= (texceptions[rt] ? 1ULL : 0ULL) << index;
1521 ++index;
1522 }
1523 params.num_color_attachments = static_cast<u8>(index);
1524 params.texceptions = static_cast<u8>(color_texceptions);
1525
1526 params.zeta_format = regs.zeta_enable ? static_cast<u8>(regs.zeta.format) : 0;
1527 params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX];
1528 return params;
1529}
1530
1531VkBuffer RasterizerVulkan::DefaultBuffer() { 1434VkBuffer RasterizerVulkan::DefaultBuffer() {
1532 if (default_buffer) { 1435 if (default_buffer) {
1533 return *default_buffer; 1436 return *default_buffer;
1534 } 1437 }
1535
1536 default_buffer = device.GetLogical().CreateBuffer({ 1438 default_buffer = device.GetLogical().CreateBuffer({
1537 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 1439 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1538 .pNext = nullptr, 1440 .pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 30ec58eb4..4695718e9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -11,11 +11,11 @@
11#include <vector> 11#include <vector>
12 12
13#include <boost/container/static_vector.hpp> 13#include <boost/container/static_vector.hpp>
14#include <boost/functional/hash.hpp>
15 14
16#include "common/common_types.h" 15#include "common/common_types.h"
17#include "video_core/rasterizer_accelerated.h" 16#include "video_core/rasterizer_accelerated.h"
18#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
18#include "video_core/renderer_vulkan/blit_image.h"
19#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 19#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
20#include "video_core/renderer_vulkan/vk_buffer_cache.h" 20#include "video_core/renderer_vulkan/vk_buffer_cache.h"
21#include "video_core/renderer_vulkan/vk_compute_pass.h" 21#include "video_core/renderer_vulkan/vk_compute_pass.h"
@@ -24,14 +24,13 @@
24#include "video_core/renderer_vulkan/vk_memory_manager.h" 24#include "video_core/renderer_vulkan/vk_memory_manager.h"
25#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 25#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
26#include "video_core/renderer_vulkan/vk_query_cache.h" 26#include "video_core/renderer_vulkan/vk_query_cache.h"
27#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
28#include "video_core/renderer_vulkan/vk_sampler_cache.h"
29#include "video_core/renderer_vulkan/vk_scheduler.h" 27#include "video_core/renderer_vulkan/vk_scheduler.h"
30#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 28#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
29#include "video_core/renderer_vulkan/vk_stream_buffer.h"
31#include "video_core/renderer_vulkan/vk_texture_cache.h" 30#include "video_core/renderer_vulkan/vk_texture_cache.h"
32#include "video_core/renderer_vulkan/vk_update_descriptor.h" 31#include "video_core/renderer_vulkan/vk_update_descriptor.h"
33#include "video_core/renderer_vulkan/wrapper.h"
34#include "video_core/shader/async_shaders.h" 32#include "video_core/shader/async_shaders.h"
33#include "video_core/vulkan_common/vulkan_wrapper.h"
35 34
36namespace Core { 35namespace Core {
37class System; 36class System;
@@ -49,65 +48,14 @@ namespace Vulkan {
49 48
50struct VKScreenInfo; 49struct VKScreenInfo;
51 50
52using ImageViewsPack = boost::container::static_vector<VkImageView, Maxwell::NumRenderTargets + 1>;
53
54struct FramebufferCacheKey {
55 VkRenderPass renderpass{};
56 u32 width = 0;
57 u32 height = 0;
58 u32 layers = 0;
59 ImageViewsPack views;
60
61 std::size_t Hash() const noexcept {
62 std::size_t hash = 0;
63 boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass));
64 for (const auto& view : views) {
65 boost::hash_combine(hash, static_cast<VkImageView>(view));
66 }
67 boost::hash_combine(hash, width);
68 boost::hash_combine(hash, height);
69 boost::hash_combine(hash, layers);
70 return hash;
71 }
72
73 bool operator==(const FramebufferCacheKey& rhs) const noexcept {
74 return std::tie(renderpass, views, width, height, layers) ==
75 std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers);
76 }
77
78 bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
79 return !operator==(rhs);
80 }
81};
82
83} // namespace Vulkan
84
85namespace std {
86
87template <>
88struct hash<Vulkan::FramebufferCacheKey> {
89 std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept {
90 return k.Hash();
91 }
92};
93
94} // namespace std
95
96namespace Vulkan {
97
98class StateTracker; 51class StateTracker;
99class BufferBindings; 52class BufferBindings;
100 53
101struct ImageView {
102 View view;
103 VkImageLayout* layout = nullptr;
104};
105
106class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { 54class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
107public: 55public:
108 explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 56 explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
109 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 57 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
110 VKScreenInfo& screen_info_, const VKDevice& device_, 58 VKScreenInfo& screen_info_, const Device& device_,
111 VKMemoryManager& memory_manager_, StateTracker& state_tracker_, 59 VKMemoryManager& memory_manager_, StateTracker& state_tracker_,
112 VKScheduler& scheduler_); 60 VKScheduler& scheduler_);
113 ~RasterizerVulkan() override; 61 ~RasterizerVulkan() override;
@@ -123,15 +71,18 @@ public:
123 void InvalidateRegion(VAddr addr, u64 size) override; 71 void InvalidateRegion(VAddr addr, u64 size) override;
124 void OnCPUWrite(VAddr addr, u64 size) override; 72 void OnCPUWrite(VAddr addr, u64 size) override;
125 void SyncGuestHost() override; 73 void SyncGuestHost() override;
74 void UnmapMemory(VAddr addr, u64 size) override;
126 void SignalSemaphore(GPUVAddr addr, u32 value) override; 75 void SignalSemaphore(GPUVAddr addr, u32 value) override;
127 void SignalSyncPoint(u32 value) override; 76 void SignalSyncPoint(u32 value) override;
128 void ReleaseFences() override; 77 void ReleaseFences() override;
129 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 78 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
130 void WaitForIdle() override; 79 void WaitForIdle() override;
80 void FragmentBarrier() override;
81 void TiledCacheBarrier() override;
131 void FlushCommands() override; 82 void FlushCommands() override;
132 void TickFrame() override; 83 void TickFrame() override;
133 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 84 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
134 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 85 const Tegra::Engines::Fermi2D::Surface& dst,
135 const Tegra::Engines::Fermi2D::Config& copy_config) override; 86 const Tegra::Engines::Fermi2D::Config& copy_config) override;
136 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 87 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
137 u32 pixel_stride) override; 88 u32 pixel_stride) override;
@@ -145,11 +96,17 @@ public:
145 } 96 }
146 97
147 /// Maximum supported size that a constbuffer can have in bytes. 98 /// Maximum supported size that a constbuffer can have in bytes.
148 static constexpr std::size_t MaxConstbufferSize = 0x10000; 99 static constexpr size_t MaxConstbufferSize = 0x10000;
149 static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, 100 static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
150 "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); 101 "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
151 102
152private: 103private:
104 static constexpr size_t MAX_TEXTURES = 192;
105 static constexpr size_t MAX_IMAGES = 48;
106 static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
107
108 static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
109
153 struct DrawParameters { 110 struct DrawParameters {
154 void Draw(vk::CommandBuffer cmdbuf) const; 111 void Draw(vk::CommandBuffer cmdbuf) const;
155 112
@@ -160,23 +117,8 @@ private:
160 bool is_indexed = 0; 117 bool is_indexed = 0;
161 }; 118 };
162 119
163 using ColorAttachments = std::array<View, Maxwell::NumRenderTargets>;
164 using ZetaAttachment = View;
165
166 using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>;
167
168 static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8;
169 static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
170
171 void FlushWork(); 120 void FlushWork();
172 121
173 /// @brief Updates the currently bound attachments
174 /// @param is_clear True when the framebuffer is updated as a clear
175 /// @return Bitfield of attachments being used as sampled textures
176 Texceptions UpdateAttachments(bool is_clear);
177
178 std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass);
179
180 /// Setups geometry buffers and state. 122 /// Setups geometry buffers and state.
181 DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, 123 DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
182 bool is_indexed, bool is_instanced); 124 bool is_indexed, bool is_instanced);
@@ -184,17 +126,12 @@ private:
184 /// Setup descriptors in the graphics pipeline. 126 /// Setup descriptors in the graphics pipeline.
185 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); 127 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
186 128
187 void SetupImageTransitions(Texceptions texceptions, const ColorAttachments& color,
188 const ZetaAttachment& zeta);
189
190 void UpdateDynamicStates(); 129 void UpdateDynamicStates();
191 130
192 void BeginTransformFeedback(); 131 void BeginTransformFeedback();
193 132
194 void EndTransformFeedback(); 133 void EndTransformFeedback();
195 134
196 bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
197
198 void SetupVertexArrays(BufferBindings& buffer_bindings); 135 void SetupVertexArrays(BufferBindings& buffer_bindings);
199 136
200 void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); 137 void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
@@ -240,14 +177,6 @@ private:
240 177
241 void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); 178 void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
242 179
243 void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry);
244
245 void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
246
247 void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry);
248
249 void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
250
251 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); 180 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
252 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); 181 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
253 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); 182 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -264,18 +193,16 @@ private:
264 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); 193 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
265 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); 194 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
266 195
267 std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; 196 size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
268
269 std::size_t CalculateComputeStreamBufferSize() const;
270 197
271 std::size_t CalculateVertexArraysSize() const; 198 size_t CalculateComputeStreamBufferSize() const;
272 199
273 std::size_t CalculateIndexBufferSize() const; 200 size_t CalculateVertexArraysSize() const;
274 201
275 std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry, 202 size_t CalculateIndexBufferSize() const;
276 const Tegra::Engines::ConstBufferInfo& buffer) const;
277 203
278 RenderPassParams GetRenderPassParams(Texceptions texceptions) const; 204 size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
205 const Tegra::Engines::ConstBufferInfo& buffer) const;
279 206
280 VkBuffer DefaultBuffer(); 207 VkBuffer DefaultBuffer();
281 208
@@ -285,23 +212,24 @@ private:
285 Tegra::Engines::KeplerCompute& kepler_compute; 212 Tegra::Engines::KeplerCompute& kepler_compute;
286 213
287 VKScreenInfo& screen_info; 214 VKScreenInfo& screen_info;
288 const VKDevice& device; 215 const Device& device;
289 VKMemoryManager& memory_manager; 216 VKMemoryManager& memory_manager;
290 StateTracker& state_tracker; 217 StateTracker& state_tracker;
291 VKScheduler& scheduler; 218 VKScheduler& scheduler;
292 219
220 VKStreamBuffer stream_buffer;
293 VKStagingBufferPool staging_pool; 221 VKStagingBufferPool staging_pool;
294 VKDescriptorPool descriptor_pool; 222 VKDescriptorPool descriptor_pool;
295 VKUpdateDescriptorQueue update_descriptor_queue; 223 VKUpdateDescriptorQueue update_descriptor_queue;
296 VKRenderPassCache renderpass_cache; 224 BlitImageHelper blit_image;
297 QuadArrayPass quad_array_pass; 225 QuadArrayPass quad_array_pass;
298 QuadIndexedPass quad_indexed_pass; 226 QuadIndexedPass quad_indexed_pass;
299 Uint8Pass uint8_pass; 227 Uint8Pass uint8_pass;
300 228
301 VKTextureCache texture_cache; 229 TextureCacheRuntime texture_cache_runtime;
230 TextureCache texture_cache;
302 VKPipelineCache pipeline_cache; 231 VKPipelineCache pipeline_cache;
303 VKBufferCache buffer_cache; 232 VKBufferCache buffer_cache;
304 VKSamplerCache sampler_cache;
305 VKQueryCache query_cache; 233 VKQueryCache query_cache;
306 VKFenceManager fence_manager; 234 VKFenceManager fence_manager;
307 235
@@ -310,16 +238,11 @@ private:
310 vk::Event wfi_event; 238 vk::Event wfi_event;
311 VideoCommon::Shader::AsyncShaders async_shaders; 239 VideoCommon::Shader::AsyncShaders async_shaders;
312 240
313 ColorAttachments color_attachments; 241 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
314 ZetaAttachment zeta_attachment; 242 std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
315 243 boost::container::static_vector<VkSampler, MAX_TEXTURES> sampler_handles;
316 std::vector<ImageView> sampled_views;
317 std::vector<ImageView> image_views;
318 244
319 u32 draw_counter = 0; 245 u32 draw_counter = 0;
320
321 // TODO(Rodrigo): Invalidate on image destruction
322 std::unordered_map<FramebufferCacheKey, vk::Framebuffer> framebuffer_cache;
323}; 246};
324 247
325} // namespace Vulkan 248} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
deleted file mode 100644
index e812c7dd6..000000000
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <memory>
7#include <vector>
8
9#include "common/cityhash.h"
10#include "video_core/engines/maxwell_3d.h"
11#include "video_core/renderer_vulkan/maxwell_to_vk.h"
12#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
14#include "video_core/renderer_vulkan/wrapper.h"
15
16namespace Vulkan {
17
18std::size_t RenderPassParams::Hash() const noexcept {
19 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
20 return static_cast<std::size_t>(hash);
21}
22
23bool RenderPassParams::operator==(const RenderPassParams& rhs) const noexcept {
24 return std::memcmp(&rhs, this, sizeof *this) == 0;
25}
26
27VKRenderPassCache::VKRenderPassCache(const VKDevice& device_) : device{device_} {}
28
29VKRenderPassCache::~VKRenderPassCache() = default;
30
31VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
32 const auto [pair, is_cache_miss] = cache.try_emplace(params);
33 auto& entry = pair->second;
34 if (is_cache_miss) {
35 entry = CreateRenderPass(params);
36 }
37 return *entry;
38}
39
40vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
41 using namespace VideoCore::Surface;
42 const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments);
43
44 std::vector<VkAttachmentDescription> descriptors;
45 descriptors.reserve(num_attachments);
46
47 std::vector<VkAttachmentReference> color_references;
48 color_references.reserve(num_attachments);
49
50 for (std::size_t rt = 0; rt < num_attachments; ++rt) {
51 const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]);
52 const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format);
53 const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format);
54 ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
55 static_cast<int>(pixel_format));
56
57 // TODO(Rodrigo): Add MAY_ALIAS_BIT when it's needed.
58 const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0
59 ? VK_IMAGE_LAYOUT_GENERAL
60 : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
61 descriptors.push_back({
62 .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
63 .format = format.format,
64 .samples = VK_SAMPLE_COUNT_1_BIT,
65 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
66 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
67 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
68 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
69 .initialLayout = color_layout,
70 .finalLayout = color_layout,
71 });
72
73 color_references.push_back({
74 .attachment = static_cast<u32>(rt),
75 .layout = color_layout,
76 });
77 }
78
79 VkAttachmentReference zeta_attachment_ref;
80 const bool has_zeta = params.zeta_format != 0;
81 if (has_zeta) {
82 const auto guest_format = static_cast<Tegra::DepthFormat>(params.zeta_format);
83 const PixelFormat pixel_format = PixelFormatFromDepthFormat(guest_format);
84 const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format);
85 ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
86 static_cast<int>(pixel_format));
87
88 const VkImageLayout zeta_layout = params.zeta_texception != 0
89 ? VK_IMAGE_LAYOUT_GENERAL
90 : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
91 descriptors.push_back({
92 .flags = 0,
93 .format = format.format,
94 .samples = VK_SAMPLE_COUNT_1_BIT,
95 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
96 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
97 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
98 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
99 .initialLayout = zeta_layout,
100 .finalLayout = zeta_layout,
101 });
102
103 zeta_attachment_ref = {
104 .attachment = static_cast<u32>(num_attachments),
105 .layout = zeta_layout,
106 };
107 }
108
109 const VkSubpassDescription subpass_description{
110 .flags = 0,
111 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
112 .inputAttachmentCount = 0,
113 .pInputAttachments = nullptr,
114 .colorAttachmentCount = static_cast<u32>(color_references.size()),
115 .pColorAttachments = color_references.data(),
116 .pResolveAttachments = nullptr,
117 .pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr,
118 .preserveAttachmentCount = 0,
119 .pPreserveAttachments = nullptr,
120 };
121
122 VkAccessFlags access = 0;
123 VkPipelineStageFlags stage = 0;
124 if (!color_references.empty()) {
125 access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
126 stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
127 }
128
129 if (has_zeta) {
130 access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
131 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
132 stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
133 }
134
135 const VkSubpassDependency subpass_dependency{
136 .srcSubpass = VK_SUBPASS_EXTERNAL,
137 .dstSubpass = 0,
138 .srcStageMask = stage,
139 .dstStageMask = stage,
140 .srcAccessMask = 0,
141 .dstAccessMask = access,
142 .dependencyFlags = 0,
143 };
144
145 return device.GetLogical().CreateRenderPass({
146 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
147 .pNext = nullptr,
148 .flags = 0,
149 .attachmentCount = static_cast<u32>(descriptors.size()),
150 .pAttachments = descriptors.data(),
151 .subpassCount = 1,
152 .pSubpasses = &subpass_description,
153 .dependencyCount = 1,
154 .pDependencies = &subpass_dependency,
155 });
156}
157
158} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h
deleted file mode 100644
index 652ecef7b..000000000
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h
+++ /dev/null
@@ -1,70 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <type_traits>
8#include <unordered_map>
9
10#include <boost/container/static_vector.hpp>
11#include <boost/functional/hash.hpp>
12
13#include "video_core/engines/maxwell_3d.h"
14#include "video_core/renderer_vulkan/wrapper.h"
15#include "video_core/surface.h"
16
17namespace Vulkan {
18
19class VKDevice;
20
21struct RenderPassParams {
22 std::array<u8, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_formats;
23 u8 num_color_attachments;
24 u8 texceptions;
25
26 u8 zeta_format;
27 u8 zeta_texception;
28
29 std::size_t Hash() const noexcept;
30
31 bool operator==(const RenderPassParams& rhs) const noexcept;
32
33 bool operator!=(const RenderPassParams& rhs) const noexcept {
34 return !operator==(rhs);
35 }
36};
37static_assert(std::has_unique_object_representations_v<RenderPassParams>);
38static_assert(std::is_trivially_copyable_v<RenderPassParams>);
39static_assert(std::is_trivially_constructible_v<RenderPassParams>);
40
41} // namespace Vulkan
42
43namespace std {
44
45template <>
46struct hash<Vulkan::RenderPassParams> {
47 std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept {
48 return k.Hash();
49 }
50};
51
52} // namespace std
53
54namespace Vulkan {
55
56class VKRenderPassCache final {
57public:
58 explicit VKRenderPassCache(const VKDevice& device_);
59 ~VKRenderPassCache();
60
61 VkRenderPass GetRenderPass(const RenderPassParams& params);
62
63private:
64 vk::RenderPass CreateRenderPass(const RenderPassParams& params) const;
65
66 const VKDevice& device;
67 std::unordered_map<RenderPassParams, vk::RenderPass> cache;
68};
69
70} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
deleted file mode 100644
index b859691fa..000000000
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <unordered_map>
6
7#include "video_core/renderer_vulkan/maxwell_to_vk.h"
8#include "video_core/renderer_vulkan/vk_sampler_cache.h"
9#include "video_core/renderer_vulkan/wrapper.h"
10#include "video_core/textures/texture.h"
11
12using Tegra::Texture::TextureMipmapFilter;
13
14namespace Vulkan {
15
16namespace {
17
18VkBorderColor ConvertBorderColor(std::array<float, 4> color) {
19 // TODO(Rodrigo): Manage integer border colors
20 if (color == std::array<float, 4>{0, 0, 0, 0}) {
21 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
22 } else if (color == std::array<float, 4>{0, 0, 0, 1}) {
23 return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
24 } else if (color == std::array<float, 4>{1, 1, 1, 1}) {
25 return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
26 }
27 if (color[0] + color[1] + color[2] > 1.35f) {
28 // If color elements are brighter than roughly 0.5 average, use white border
29 return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
30 } else if (color[3] > 0.5f) {
31 return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
32 } else {
33 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
34 }
35}
36
37} // Anonymous namespace
38
39VKSamplerCache::VKSamplerCache(const VKDevice& device_) : device{device_} {}
40
41VKSamplerCache::~VKSamplerCache() = default;
42
43vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
44 const bool arbitrary_borders = device.IsExtCustomBorderColorSupported();
45 const std::array color = tsc.GetBorderColor();
46
47 VkSamplerCustomBorderColorCreateInfoEXT border{
48 .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT,
49 .pNext = nullptr,
50 .customBorderColor = {},
51 .format = VK_FORMAT_UNDEFINED,
52 };
53 std::memcpy(&border.customBorderColor, color.data(), sizeof(color));
54
55 return device.GetLogical().CreateSampler({
56 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
57 .pNext = arbitrary_borders ? &border : nullptr,
58 .flags = 0,
59 .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
60 .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
61 .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
62 .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
63 .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
64 .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
65 .mipLodBias = tsc.GetLodBias(),
66 .anisotropyEnable =
67 static_cast<VkBool32>(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE),
68 .maxAnisotropy = tsc.GetMaxAnisotropy(),
69 .compareEnable = tsc.depth_compare_enabled,
70 .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
71 .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(),
72 .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(),
73 .borderColor =
74 arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color),
75 .unnormalizedCoordinates = VK_FALSE,
76 });
77}
78
79VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const {
80 return *sampler;
81}
82
83} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h
deleted file mode 100644
index 3f22c4610..000000000
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.h
+++ /dev/null
@@ -1,29 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/renderer_vulkan/wrapper.h"
8#include "video_core/sampler_cache.h"
9#include "video_core/textures/texture.h"
10
11namespace Vulkan {
12
13class VKDevice;
14
15class VKSamplerCache final : public VideoCommon::SamplerCache<VkSampler, vk::Sampler> {
16public:
17 explicit VKSamplerCache(const VKDevice& device_);
18 ~VKSamplerCache();
19
20protected:
21 vk::Sampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
22
23 VkSampler ToSamplerType(const vk::Sampler& sampler) const override;
24
25private:
26 const VKDevice& device;
27};
28
29} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 1a483dc71..66004f9c0 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -11,12 +11,13 @@
11#include "common/microprofile.h" 11#include "common/microprofile.h"
12#include "common/thread.h" 12#include "common/thread.h"
13#include "video_core/renderer_vulkan/vk_command_pool.h" 13#include "video_core/renderer_vulkan/vk_command_pool.h"
14#include "video_core/renderer_vulkan/vk_device.h"
15#include "video_core/renderer_vulkan/vk_master_semaphore.h" 14#include "video_core/renderer_vulkan/vk_master_semaphore.h"
16#include "video_core/renderer_vulkan/vk_query_cache.h" 15#include "video_core/renderer_vulkan/vk_query_cache.h"
17#include "video_core/renderer_vulkan/vk_scheduler.h" 16#include "video_core/renderer_vulkan/vk_scheduler.h"
18#include "video_core/renderer_vulkan/vk_state_tracker.h" 17#include "video_core/renderer_vulkan/vk_state_tracker.h"
19#include "video_core/renderer_vulkan/wrapper.h" 18#include "video_core/renderer_vulkan/vk_texture_cache.h"
19#include "video_core/vulkan_common/vulkan_device.h"
20#include "video_core/vulkan_common/vulkan_wrapper.h"
20 21
21namespace Vulkan { 22namespace Vulkan {
22 23
@@ -36,7 +37,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
36 last = nullptr; 37 last = nullptr;
37} 38}
38 39
39VKScheduler::VKScheduler(const VKDevice& device_, StateTracker& state_tracker_) 40VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_)
40 : device{device_}, state_tracker{state_tracker_}, 41 : device{device_}, state_tracker{state_tracker_},
41 master_semaphore{std::make_unique<MasterSemaphore>(device)}, 42 master_semaphore{std::make_unique<MasterSemaphore>(device)},
42 command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} { 43 command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
@@ -96,38 +97,39 @@ void VKScheduler::DispatchWork() {
96 AcquireNewChunk(); 97 AcquireNewChunk();
97} 98}
98 99
99void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, 100void VKScheduler::RequestRenderpass(const Framebuffer* framebuffer) {
100 VkExtent2D render_area) { 101 const VkRenderPass renderpass = framebuffer->RenderPass();
101 if (renderpass == state.renderpass && framebuffer == state.framebuffer && 102 const VkFramebuffer framebuffer_handle = framebuffer->Handle();
103 const VkExtent2D render_area = framebuffer->RenderArea();
104 if (renderpass == state.renderpass && framebuffer_handle == state.framebuffer &&
102 render_area.width == state.render_area.width && 105 render_area.width == state.render_area.width &&
103 render_area.height == state.render_area.height) { 106 render_area.height == state.render_area.height) {
104 return; 107 return;
105 } 108 }
106 const bool end_renderpass = state.renderpass != nullptr; 109 EndRenderPass();
107 state.renderpass = renderpass; 110 state.renderpass = renderpass;
108 state.framebuffer = framebuffer; 111 state.framebuffer = framebuffer_handle;
109 state.render_area = render_area; 112 state.render_area = render_area;
110 113
111 const VkRenderPassBeginInfo renderpass_bi{ 114 Record([renderpass, framebuffer_handle, render_area](vk::CommandBuffer cmdbuf) {
112 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, 115 const VkRenderPassBeginInfo renderpass_bi{
113 .pNext = nullptr, 116 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
114 .renderPass = renderpass, 117 .pNext = nullptr,
115 .framebuffer = framebuffer, 118 .renderPass = renderpass,
116 .renderArea = 119 .framebuffer = framebuffer_handle,
117 { 120 .renderArea =
118 .offset = {.x = 0, .y = 0}, 121 {
119 .extent = render_area, 122 .offset = {.x = 0, .y = 0},
120 }, 123 .extent = render_area,
121 .clearValueCount = 0, 124 },
122 .pClearValues = nullptr, 125 .clearValueCount = 0,
123 }; 126 .pClearValues = nullptr,
124 127 };
125 Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) {
126 if (end_renderpass) {
127 cmdbuf.EndRenderPass();
128 }
129 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); 128 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
130 }); 129 });
130 num_renderpass_images = framebuffer->NumImages();
131 renderpass_images = framebuffer->Images();
132 renderpass_image_ranges = framebuffer->ImageRanges();
131} 133}
132 134
133void VKScheduler::RequestOutsideRenderPassOperationContext() { 135void VKScheduler::RequestOutsideRenderPassOperationContext() {
@@ -241,8 +243,37 @@ void VKScheduler::EndRenderPass() {
241 if (!state.renderpass) { 243 if (!state.renderpass) {
242 return; 244 return;
243 } 245 }
246 Record([num_images = num_renderpass_images, images = renderpass_images,
247 ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) {
248 std::array<VkImageMemoryBarrier, 9> barriers;
249 for (size_t i = 0; i < num_images; ++i) {
250 barriers[i] = VkImageMemoryBarrier{
251 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
252 .pNext = nullptr,
253 .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
254 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
255 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
256 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
257 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
258 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
259 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
260 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
261 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
262 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
263 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
264 .image = images[i],
265 .subresourceRange = ranges[i],
266 };
267 }
268 cmdbuf.EndRenderPass();
269 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
270 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
271 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
272 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr,
273 vk::Span(barriers.data(), num_images));
274 });
244 state.renderpass = nullptr; 275 state.renderpass = nullptr;
245 Record([](vk::CommandBuffer cmdbuf) { cmdbuf.EndRenderPass(); }); 276 num_renderpass_images = 0;
246} 277}
247 278
248void VKScheduler::AcquireNewChunk() { 279void VKScheduler::AcquireNewChunk() {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 6d3a5da0b..4cd43e425 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -12,21 +12,22 @@
12#include <utility> 12#include <utility>
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "common/threadsafe_queue.h" 14#include "common/threadsafe_queue.h"
15#include "video_core/renderer_vulkan/wrapper.h" 15#include "video_core/vulkan_common/vulkan_wrapper.h"
16 16
17namespace Vulkan { 17namespace Vulkan {
18 18
19class CommandPool; 19class CommandPool;
20class Device;
21class Framebuffer;
20class MasterSemaphore; 22class MasterSemaphore;
21class StateTracker; 23class StateTracker;
22class VKDevice;
23class VKQueryCache; 24class VKQueryCache;
24 25
25/// The scheduler abstracts command buffer and fence management with an interface that's able to do 26/// The scheduler abstracts command buffer and fence management with an interface that's able to do
26/// OpenGL-like operations on Vulkan command buffers. 27/// OpenGL-like operations on Vulkan command buffers.
27class VKScheduler { 28class VKScheduler {
28public: 29public:
29 explicit VKScheduler(const VKDevice& device, StateTracker& state_tracker); 30 explicit VKScheduler(const Device& device, StateTracker& state_tracker);
30 ~VKScheduler(); 31 ~VKScheduler();
31 32
32 /// Returns the current command buffer tick. 33 /// Returns the current command buffer tick.
@@ -52,8 +53,7 @@ public:
52 void DispatchWork(); 53 void DispatchWork();
53 54
54 /// Requests to begin a renderpass. 55 /// Requests to begin a renderpass.
55 void RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, 56 void RequestRenderpass(const Framebuffer* framebuffer);
56 VkExtent2D render_area);
57 57
58 /// Requests the current executino context to be able to execute operations only allowed outside 58 /// Requests the current executino context to be able to execute operations only allowed outside
59 /// of a renderpass. 59 /// of a renderpass.
@@ -62,6 +62,9 @@ public:
62 /// Binds a pipeline to the current execution context. 62 /// Binds a pipeline to the current execution context.
63 void BindGraphicsPipeline(VkPipeline pipeline); 63 void BindGraphicsPipeline(VkPipeline pipeline);
64 64
65 /// Invalidates current command buffer state except for render passes
66 void InvalidateState();
67
65 /// Assigns the query cache. 68 /// Assigns the query cache.
66 void SetQueryCache(VKQueryCache& query_cache_) { 69 void SetQueryCache(VKQueryCache& query_cache_) {
67 query_cache = &query_cache_; 70 query_cache = &query_cache_;
@@ -170,15 +173,13 @@ private:
170 173
171 void AllocateNewContext(); 174 void AllocateNewContext();
172 175
173 void InvalidateState();
174
175 void EndPendingOperations(); 176 void EndPendingOperations();
176 177
177 void EndRenderPass(); 178 void EndRenderPass();
178 179
179 void AcquireNewChunk(); 180 void AcquireNewChunk();
180 181
181 const VKDevice& device; 182 const Device& device;
182 StateTracker& state_tracker; 183 StateTracker& state_tracker;
183 184
184 std::unique_ptr<MasterSemaphore> master_semaphore; 185 std::unique_ptr<MasterSemaphore> master_semaphore;
@@ -192,6 +193,11 @@ private:
192 std::thread worker_thread; 193 std::thread worker_thread;
193 194
194 State state; 195 State state;
196
197 u32 num_renderpass_images = 0;
198 std::array<VkImage, 9> renderpass_images{};
199 std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};
200
195 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue; 201 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
196 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; 202 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
197 std::mutex mutex; 203 std::mutex mutex;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 72954d0e3..89cbe01ad 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -22,11 +22,11 @@
22#include "video_core/engines/shader_bytecode.h" 22#include "video_core/engines/shader_bytecode.h"
23#include "video_core/engines/shader_header.h" 23#include "video_core/engines/shader_header.h"
24#include "video_core/engines/shader_type.h" 24#include "video_core/engines/shader_type.h"
25#include "video_core/renderer_vulkan/vk_device.h"
26#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 25#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
27#include "video_core/shader/node.h" 26#include "video_core/shader/node.h"
28#include "video_core/shader/shader_ir.h" 27#include "video_core/shader/shader_ir.h"
29#include "video_core/shader/transform_feedback.h" 28#include "video_core/shader/transform_feedback.h"
29#include "video_core/vulkan_common/vulkan_device.h"
30 30
31namespace Vulkan { 31namespace Vulkan {
32 32
@@ -102,7 +102,7 @@ struct GenericVaryingDescription {
102 bool is_scalar = false; 102 bool is_scalar = false;
103}; 103};
104 104
105spv::Dim GetSamplerDim(const Sampler& sampler) { 105spv::Dim GetSamplerDim(const SamplerEntry& sampler) {
106 ASSERT(!sampler.is_buffer); 106 ASSERT(!sampler.is_buffer);
107 switch (sampler.type) { 107 switch (sampler.type) {
108 case Tegra::Shader::TextureType::Texture1D: 108 case Tegra::Shader::TextureType::Texture1D:
@@ -119,7 +119,7 @@ spv::Dim GetSamplerDim(const Sampler& sampler) {
119 } 119 }
120} 120}
121 121
122std::pair<spv::Dim, bool> GetImageDim(const Image& image) { 122std::pair<spv::Dim, bool> GetImageDim(const ImageEntry& image) {
123 switch (image.type) { 123 switch (image.type) {
124 case Tegra::Shader::ImageType::Texture1D: 124 case Tegra::Shader::ImageType::Texture1D:
125 return {spv::Dim::Dim1D, false}; 125 return {spv::Dim::Dim1D, false};
@@ -272,19 +272,12 @@ bool IsPrecise(Operation operand) {
272 return false; 272 return false;
273} 273}
274 274
275u32 ShaderVersion(const VKDevice& device) {
276 if (device.InstanceApiVersion() < VK_API_VERSION_1_1) {
277 return 0x00010000;
278 }
279 return 0x00010300;
280}
281
282class SPIRVDecompiler final : public Sirit::Module { 275class SPIRVDecompiler final : public Sirit::Module {
283public: 276public:
284 explicit SPIRVDecompiler(const VKDevice& device_, const ShaderIR& ir_, ShaderType stage_, 277 explicit SPIRVDecompiler(const Device& device_, const ShaderIR& ir_, ShaderType stage_,
285 const Registry& registry_, const Specialization& specialization_) 278 const Registry& registry_, const Specialization& specialization_)
286 : Module(ShaderVersion(device_)), device{device_}, ir{ir_}, stage{stage_}, 279 : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()},
287 header{ir_.GetHeader()}, registry{registry_}, specialization{specialization_} { 280 registry{registry_}, specialization{specialization_} {
288 if (stage_ != ShaderType::Compute) { 281 if (stage_ != ShaderType::Compute) {
289 transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo()); 282 transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo());
290 } 283 }
@@ -980,7 +973,7 @@ private:
980 return binding; 973 return binding;
981 } 974 }
982 975
983 void DeclareImage(const Image& image, u32& binding) { 976 void DeclareImage(const ImageEntry& image, u32& binding) {
984 const auto [dim, arrayed] = GetImageDim(image); 977 const auto [dim, arrayed] = GetImageDim(image);
985 constexpr int depth = 0; 978 constexpr int depth = 0;
986 constexpr bool ms = false; 979 constexpr bool ms = false;
@@ -2749,7 +2742,7 @@ private:
2749 }; 2742 };
2750 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 2743 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2751 2744
2752 const VKDevice& device; 2745 const Device& device;
2753 const ShaderIR& ir; 2746 const ShaderIR& ir;
2754 const ShaderType stage; 2747 const ShaderType stage;
2755 const Tegra::Shader::Header header; 2748 const Tegra::Shader::Header header;
@@ -3137,7 +3130,7 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
3137 return entries; 3130 return entries;
3138} 3131}
3139 3132
3140std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, 3133std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
3141 ShaderType stage, const VideoCommon::Shader::Registry& registry, 3134 ShaderType stage, const VideoCommon::Shader::Registry& registry,
3142 const Specialization& specialization) { 3135 const Specialization& specialization) {
3143 return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble(); 3136 return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index df1812514..26381e444 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -15,16 +15,14 @@
15#include "video_core/shader/shader_ir.h" 15#include "video_core/shader/shader_ir.h"
16 16
17namespace Vulkan { 17namespace Vulkan {
18class VKDevice;
19}
20 18
21namespace Vulkan { 19class Device;
22 20
23using Maxwell = Tegra::Engines::Maxwell3D::Regs; 21using Maxwell = Tegra::Engines::Maxwell3D::Regs;
24using UniformTexelEntry = VideoCommon::Shader::Sampler; 22using UniformTexelEntry = VideoCommon::Shader::SamplerEntry;
25using SamplerEntry = VideoCommon::Shader::Sampler; 23using SamplerEntry = VideoCommon::Shader::SamplerEntry;
26using StorageTexelEntry = VideoCommon::Shader::Image; 24using StorageTexelEntry = VideoCommon::Shader::ImageEntry;
27using ImageEntry = VideoCommon::Shader::Image; 25using ImageEntry = VideoCommon::Shader::ImageEntry;
28 26
29constexpr u32 DESCRIPTOR_SET = 0; 27constexpr u32 DESCRIPTOR_SET = 0;
30 28
@@ -109,7 +107,7 @@ struct SPIRVShader {
109 107
110ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); 108ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
111 109
112std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, 110std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
113 Tegra::Engines::ShaderType stage, 111 Tegra::Engines::ShaderType stage,
114 const VideoCommon::Shader::Registry& registry, 112 const VideoCommon::Shader::Registry& registry,
115 const Specialization& specialization); 113 const Specialization& specialization);
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp
index c1a218d76..aaad4f292 100644
--- a/src/video_core/renderer_vulkan/vk_shader_util.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp
@@ -7,24 +7,19 @@
7 7
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/renderer_vulkan/vk_device.h"
11#include "video_core/renderer_vulkan/vk_shader_util.h" 10#include "video_core/renderer_vulkan/vk_shader_util.h"
12#include "video_core/renderer_vulkan/wrapper.h" 11#include "video_core/vulkan_common/vulkan_device.h"
12#include "video_core/vulkan_common/vulkan_wrapper.h"
13 13
14namespace Vulkan { 14namespace Vulkan {
15 15
16vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) { 16vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code) {
17 // Avoid undefined behavior by copying to a staging allocation
18 ASSERT(code_size % sizeof(u32) == 0);
19 const auto data = std::make_unique<u32[]>(code_size / sizeof(u32));
20 std::memcpy(data.get(), code_data, code_size);
21
22 return device.GetLogical().CreateShaderModule({ 17 return device.GetLogical().CreateShaderModule({
23 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 18 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
24 .pNext = nullptr, 19 .pNext = nullptr,
25 .flags = 0, 20 .flags = 0,
26 .codeSize = code_size, 21 .codeSize = static_cast<u32>(code.size_bytes()),
27 .pCode = data.get(), 22 .pCode = code.data(),
28 }); 23 });
29} 24}
30 25
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h
index d1d3f3cae..9517cbe84 100644
--- a/src/video_core/renderer_vulkan/vk_shader_util.h
+++ b/src/video_core/renderer_vulkan/vk_shader_util.h
@@ -4,13 +4,15 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <span>
8
7#include "common/common_types.h" 9#include "common/common_types.h"
8#include "video_core/renderer_vulkan/wrapper.h" 10#include "video_core/vulkan_common/vulkan_wrapper.h"
9 11
10namespace Vulkan { 12namespace Vulkan {
11 13
12class VKDevice; 14class Device;
13 15
14vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data); 16vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code);
15 17
16} // namespace Vulkan 18} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 2fd3b7f39..1e0b8b922 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -9,17 +9,17 @@
9 9
10#include "common/bit_util.h" 10#include "common/bit_util.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_scheduler.h" 12#include "video_core/renderer_vulkan/vk_scheduler.h"
14#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 13#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
15#include "video_core/renderer_vulkan/wrapper.h" 14#include "video_core/vulkan_common/vulkan_device.h"
15#include "video_core/vulkan_common/vulkan_wrapper.h"
16 16
17namespace Vulkan { 17namespace Vulkan {
18 18
19VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer_) 19VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer_)
20 : buffer{std::move(buffer_)} {} 20 : buffer{std::move(buffer_)} {}
21 21
22VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device_, VKMemoryManager& memory_manager_, 22VKStagingBufferPool::VKStagingBufferPool(const Device& device_, VKMemoryManager& memory_manager_,
23 VKScheduler& scheduler_) 23 VKScheduler& scheduler_)
24 : device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_} {} 24 : device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_} {}
25 25
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 2dd5049ac..90dadcbbe 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -10,11 +10,11 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11 11
12#include "video_core/renderer_vulkan/vk_memory_manager.h" 12#include "video_core/renderer_vulkan/vk_memory_manager.h"
13#include "video_core/renderer_vulkan/wrapper.h" 13#include "video_core/vulkan_common/vulkan_wrapper.h"
14 14
15namespace Vulkan { 15namespace Vulkan {
16 16
17class VKDevice; 17class Device;
18class VKScheduler; 18class VKScheduler;
19 19
20struct VKBuffer final { 20struct VKBuffer final {
@@ -24,7 +24,7 @@ struct VKBuffer final {
24 24
25class VKStagingBufferPool final { 25class VKStagingBufferPool final {
26public: 26public:
27 explicit VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager, 27 explicit VKStagingBufferPool(const Device& device, VKMemoryManager& memory_manager,
28 VKScheduler& scheduler); 28 VKScheduler& scheduler);
29 ~VKStagingBufferPool(); 29 ~VKStagingBufferPool();
30 30
@@ -58,7 +58,7 @@ private:
58 58
59 u64 ReleaseLevel(StagingBuffersCache& cache, std::size_t log2); 59 u64 ReleaseLevel(StagingBuffersCache& cache, std::size_t log2);
60 60
61 const VKDevice& device; 61 const Device& device;
62 VKMemoryManager& memory_manager; 62 VKMemoryManager& memory_manager;
63 VKScheduler& scheduler; 63 VKScheduler& scheduler;
64 64
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 50164cc08..1779a2e30 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array>
6#include <cstddef> 7#include <cstddef>
7#include <iterator> 8#include <iterator>
8 9
@@ -29,21 +30,15 @@ using Table = Maxwell3D::DirtyState::Table;
29using Flags = Maxwell3D::DirtyState::Flags; 30using Flags = Maxwell3D::DirtyState::Flags;
30 31
31Flags MakeInvalidationFlags() { 32Flags MakeInvalidationFlags() {
33 static constexpr std::array INVALIDATION_FLAGS{
34 Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
35 StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable,
36 DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
37 };
32 Flags flags{}; 38 Flags flags{};
33 flags[Viewports] = true; 39 for (const int flag : INVALIDATION_FLAGS) {
34 flags[Scissors] = true; 40 flags[flag] = true;
35 flags[DepthBias] = true; 41 }
36 flags[BlendConstants] = true;
37 flags[DepthBounds] = true;
38 flags[StencilProperties] = true;
39 flags[CullMode] = true;
40 flags[DepthBoundsEnable] = true;
41 flags[DepthTestEnable] = true;
42 flags[DepthWriteEnable] = true;
43 flags[DepthCompareOp] = true;
44 flags[FrontFace] = true;
45 flags[StencilOp] = true;
46 flags[StencilTestEnable] = true;
47 return flags; 42 return flags;
48} 43}
49 44
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 1de789e57..c335d2bdf 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -52,6 +52,14 @@ public:
52 current_topology = INVALID_TOPOLOGY; 52 current_topology = INVALID_TOPOLOGY;
53 } 53 }
54 54
55 void InvalidateViewports() {
56 flags[Dirty::Viewports] = true;
57 }
58
59 void InvalidateScissors() {
60 flags[Dirty::Scissors] = true;
61 }
62
55 bool TouchViewports() { 63 bool TouchViewports() {
56 return Exchange(Dirty::Viewports, false); 64 return Exchange(Dirty::Viewports, false);
57 } 65 }
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 1b59612b9..a09fe084e 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -10,15 +10,19 @@
10 10
11#include "common/alignment.h" 11#include "common/alignment.h"
12#include "common/assert.h" 12#include "common/assert.h"
13#include "video_core/renderer_vulkan/vk_device.h"
14#include "video_core/renderer_vulkan/vk_scheduler.h" 13#include "video_core/renderer_vulkan/vk_scheduler.h"
15#include "video_core/renderer_vulkan/vk_stream_buffer.h" 14#include "video_core/renderer_vulkan/vk_stream_buffer.h"
16#include "video_core/renderer_vulkan/wrapper.h" 15#include "video_core/vulkan_common/vulkan_device.h"
16#include "video_core/vulkan_common/vulkan_wrapper.h"
17 17
18namespace Vulkan { 18namespace Vulkan {
19 19
20namespace { 20namespace {
21 21
22constexpr VkBufferUsageFlags BUFFER_USAGE =
23 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
24 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
25
22constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; 26constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
23constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; 27constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
24 28
@@ -56,17 +60,16 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
56 60
57} // Anonymous namespace 61} // Anonymous namespace
58 62
59VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_, 63VKStreamBuffer::VKStreamBuffer(const Device& device_, VKScheduler& scheduler_)
60 VkBufferUsageFlags usage)
61 : device{device_}, scheduler{scheduler_} { 64 : device{device_}, scheduler{scheduler_} {
62 CreateBuffers(usage); 65 CreateBuffers();
63 ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); 66 ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
64 ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); 67 ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
65} 68}
66 69
67VKStreamBuffer::~VKStreamBuffer() = default; 70VKStreamBuffer::~VKStreamBuffer() = default;
68 71
69std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { 72std::pair<u8*, u64> VKStreamBuffer::Map(u64 size, u64 alignment) {
70 ASSERT(size <= stream_buffer_size); 73 ASSERT(size <= stream_buffer_size);
71 mapped_size = size; 74 mapped_size = size;
72 75
@@ -76,7 +79,6 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
76 79
77 WaitPendingOperations(offset); 80 WaitPendingOperations(offset);
78 81
79 bool invalidated = false;
80 if (offset + size > stream_buffer_size) { 82 if (offset + size > stream_buffer_size) {
81 // The buffer would overflow, save the amount of used watches and reset the state. 83 // The buffer would overflow, save the amount of used watches and reset the state.
82 invalidation_mark = current_watch_cursor; 84 invalidation_mark = current_watch_cursor;
@@ -90,11 +92,9 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
90 92
91 // Ensure that we don't wait for uncommitted fences. 93 // Ensure that we don't wait for uncommitted fences.
92 scheduler.Flush(); 94 scheduler.Flush();
93
94 invalidated = true;
95 } 95 }
96 96
97 return {memory.Map(offset, size), offset, invalidated}; 97 return std::make_pair(memory.Map(offset, size), offset);
98} 98}
99 99
100void VKStreamBuffer::Unmap(u64 size) { 100void VKStreamBuffer::Unmap(u64 size) {
@@ -113,7 +113,7 @@ void VKStreamBuffer::Unmap(u64 size) {
113 watch.tick = scheduler.CurrentTick(); 113 watch.tick = scheduler.CurrentTick();
114} 114}
115 115
116void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { 116void VKStreamBuffer::CreateBuffers() {
117 const auto memory_properties = device.GetPhysical().GetMemoryProperties(); 117 const auto memory_properties = device.GetPhysical().GetMemoryProperties();
118 const u32 preferred_type = GetMemoryType(memory_properties); 118 const u32 preferred_type = GetMemoryType(memory_properties);
119 const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex; 119 const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex;
@@ -127,7 +127,7 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
127 .pNext = nullptr, 127 .pNext = nullptr,
128 .flags = 0, 128 .flags = 0,
129 .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size), 129 .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size),
130 .usage = usage, 130 .usage = BUFFER_USAGE,
131 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 131 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
132 .queueFamilyIndexCount = 0, 132 .queueFamilyIndexCount = 0,
133 .pQueueFamilyIndices = nullptr, 133 .pQueueFamilyIndices = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 5e15ad78f..2e9c8cb46 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -5,31 +5,29 @@
5#pragma once 5#pragma once
6 6
7#include <optional> 7#include <optional>
8#include <tuple> 8#include <utility>
9#include <vector> 9#include <vector>
10 10
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/renderer_vulkan/wrapper.h" 12#include "video_core/vulkan_common/vulkan_wrapper.h"
13 13
14namespace Vulkan { 14namespace Vulkan {
15 15
16class VKDevice; 16class Device;
17class VKFenceWatch; 17class VKFenceWatch;
18class VKScheduler; 18class VKScheduler;
19 19
20class VKStreamBuffer final { 20class VKStreamBuffer final {
21public: 21public:
22 explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, 22 explicit VKStreamBuffer(const Device& device, VKScheduler& scheduler);
23 VkBufferUsageFlags usage);
24 ~VKStreamBuffer(); 23 ~VKStreamBuffer();
25 24
26 /** 25 /**
27 * Reserves a region of memory from the stream buffer. 26 * Reserves a region of memory from the stream buffer.
28 * @param size Size to reserve. 27 * @param size Size to reserve.
29 * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer 28 * @returns A pair of a raw memory pointer (with offset added), and the buffer offset
30 * offset and a boolean that's true when buffer has been invalidated.
31 */ 29 */
32 std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment); 30 std::pair<u8*, u64> Map(u64 size, u64 alignment);
33 31
34 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. 32 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
35 void Unmap(u64 size); 33 void Unmap(u64 size);
@@ -49,14 +47,14 @@ private:
49 }; 47 };
50 48
51 /// Creates Vulkan buffer handles committing the required the required memory. 49 /// Creates Vulkan buffer handles committing the required the required memory.
52 void CreateBuffers(VkBufferUsageFlags usage); 50 void CreateBuffers();
53 51
54 /// Increases the amount of watches available. 52 /// Increases the amount of watches available.
55 void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size); 53 void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
56 54
57 void WaitPendingOperations(u64 requested_upper_bound); 55 void WaitPendingOperations(u64 requested_upper_bound);
58 56
59 const VKDevice& device; ///< Vulkan device manager. 57 const Device& device; ///< Vulkan device manager.
60 VKScheduler& scheduler; ///< Command scheduler. 58 VKScheduler& scheduler; ///< Command scheduler.
61 59
62 vk::Buffer buffer; ///< Mapped buffer. 60 vk::Buffer buffer; ///< Mapped buffer.
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index 9636a7c65..725a2a05d 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -11,10 +11,10 @@
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "core/core.h" 12#include "core/core.h"
13#include "core/frontend/framebuffer_layout.h" 13#include "core/frontend/framebuffer_layout.h"
14#include "video_core/renderer_vulkan/vk_device.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h" 14#include "video_core/renderer_vulkan/vk_scheduler.h"
16#include "video_core/renderer_vulkan/vk_swapchain.h" 15#include "video_core/renderer_vulkan/vk_swapchain.h"
17#include "video_core/renderer_vulkan/wrapper.h" 16#include "video_core/vulkan_common/vulkan_device.h"
17#include "video_core/vulkan_common/vulkan_wrapper.h"
18 18
19namespace Vulkan { 19namespace Vulkan {
20 20
@@ -56,7 +56,7 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
56 56
57} // Anonymous namespace 57} // Anonymous namespace
58 58
59VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const VKDevice& device_, VKScheduler& scheduler_) 59VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_)
60 : surface{surface_}, device{device_}, scheduler{scheduler_} {} 60 : surface{surface_}, device{device_}, scheduler{scheduler_} {}
61 61
62VKSwapchain::~VKSwapchain() = default; 62VKSwapchain::~VKSwapchain() = default;
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index 6b39befdf..2eadd62b3 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -7,7 +7,7 @@
7#include <vector> 7#include <vector>
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/renderer_vulkan/wrapper.h" 10#include "video_core/vulkan_common/vulkan_wrapper.h"
11 11
12namespace Layout { 12namespace Layout {
13struct FramebufferLayout; 13struct FramebufferLayout;
@@ -15,12 +15,12 @@ struct FramebufferLayout;
15 15
16namespace Vulkan { 16namespace Vulkan {
17 17
18class VKDevice; 18class Device;
19class VKScheduler; 19class VKScheduler;
20 20
21class VKSwapchain { 21class VKSwapchain {
22public: 22public:
23 explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device, VKScheduler& scheduler); 23 explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler);
24 ~VKSwapchain(); 24 ~VKSwapchain();
25 25
26 /// Creates (or recreates) the swapchain with a given size. 26 /// Creates (or recreates) the swapchain with a given size.
@@ -73,7 +73,7 @@ private:
73 void Destroy(); 73 void Destroy();
74 74
75 const VkSurfaceKHR surface; 75 const VkSurfaceKHR surface;
76 const VKDevice& device; 76 const Device& device;
77 VKScheduler& scheduler; 77 VKScheduler& scheduler;
78 78
79 vk::SwapchainKHR swapchain; 79 vk::SwapchainKHR swapchain;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index ae2e3322c..bd11de012 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -4,614 +4,1105 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7#include <cstddef> 7#include <span>
8#include <cstring>
9#include <memory>
10#include <variant>
11#include <vector> 8#include <vector>
12 9
13#include "common/assert.h" 10#include "video_core/engines/fermi_2d.h"
14#include "common/common_types.h" 11#include "video_core/renderer_vulkan/blit_image.h"
15#include "core/core.h"
16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/morton.h"
18#include "video_core/renderer_vulkan/maxwell_to_vk.h" 12#include "video_core/renderer_vulkan/maxwell_to_vk.h"
19#include "video_core/renderer_vulkan/vk_device.h"
20#include "video_core/renderer_vulkan/vk_memory_manager.h" 13#include "video_core/renderer_vulkan/vk_memory_manager.h"
21#include "video_core/renderer_vulkan/vk_rasterizer.h" 14#include "video_core/renderer_vulkan/vk_rasterizer.h"
22#include "video_core/renderer_vulkan/vk_scheduler.h" 15#include "video_core/renderer_vulkan/vk_scheduler.h"
23#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 16#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
24#include "video_core/renderer_vulkan/vk_texture_cache.h" 17#include "video_core/renderer_vulkan/vk_texture_cache.h"
25#include "video_core/renderer_vulkan/wrapper.h" 18#include "video_core/vulkan_common/vulkan_device.h"
26#include "video_core/surface.h" 19#include "video_core/vulkan_common/vulkan_wrapper.h"
27 20
28namespace Vulkan { 21namespace Vulkan {
29 22
30using VideoCore::MortonSwizzle; 23using Tegra::Engines::Fermi2D;
31using VideoCore::MortonSwizzleMode;
32
33using Tegra::Texture::SwizzleSource; 24using Tegra::Texture::SwizzleSource;
34using VideoCore::Surface::PixelFormat; 25using Tegra::Texture::TextureMipmapFilter;
35using VideoCore::Surface::SurfaceTarget; 26using VideoCommon::BufferImageCopy;
27using VideoCommon::ImageInfo;
28using VideoCommon::ImageType;
29using VideoCommon::SubresourceRange;
30using VideoCore::Surface::IsPixelFormatASTC;
36 31
37namespace { 32namespace {
38 33
39VkImageType SurfaceTargetToImage(SurfaceTarget target) { 34constexpr std::array ATTACHMENT_REFERENCES{
40 switch (target) { 35 VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL},
41 case SurfaceTarget::Texture1D: 36 VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL},
42 case SurfaceTarget::Texture1DArray: 37 VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL},
38 VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL},
39 VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL},
40 VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL},
41 VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL},
42 VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL},
43 VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL},
44};
45
46constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
47 if (color == std::array<float, 4>{0, 0, 0, 0}) {
48 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
49 } else if (color == std::array<float, 4>{0, 0, 0, 1}) {
50 return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
51 } else if (color == std::array<float, 4>{1, 1, 1, 1}) {
52 return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
53 }
54 if (color[0] + color[1] + color[2] > 1.35f) {
55 // If color elements are brighter than roughly 0.5 average, use white border
56 return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
57 } else if (color[3] > 0.5f) {
58 return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
59 } else {
60 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
61 }
62}
63
64[[nodiscard]] VkImageType ConvertImageType(const ImageType type) {
65 switch (type) {
66 case ImageType::e1D:
43 return VK_IMAGE_TYPE_1D; 67 return VK_IMAGE_TYPE_1D;
44 case SurfaceTarget::Texture2D: 68 case ImageType::e2D:
45 case SurfaceTarget::Texture2DArray: 69 case ImageType::Linear:
46 case SurfaceTarget::TextureCubemap:
47 case SurfaceTarget::TextureCubeArray:
48 return VK_IMAGE_TYPE_2D; 70 return VK_IMAGE_TYPE_2D;
49 case SurfaceTarget::Texture3D: 71 case ImageType::e3D:
50 return VK_IMAGE_TYPE_3D; 72 return VK_IMAGE_TYPE_3D;
51 case SurfaceTarget::TextureBuffer: 73 case ImageType::Buffer:
52 UNREACHABLE(); 74 break;
53 return {};
54 } 75 }
55 UNREACHABLE_MSG("Unknown texture target={}", target); 76 UNREACHABLE_MSG("Invalid image type={}", type);
56 return {}; 77 return {};
57} 78}
58 79
59VkImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) { 80[[nodiscard]] VkSampleCountFlagBits ConvertSampleCount(u32 num_samples) {
60 if (pixel_format < PixelFormat::MaxColorFormat) { 81 switch (num_samples) {
61 return VK_IMAGE_ASPECT_COLOR_BIT; 82 case 1:
62 } else if (pixel_format < PixelFormat::MaxDepthFormat) { 83 return VK_SAMPLE_COUNT_1_BIT;
63 return VK_IMAGE_ASPECT_DEPTH_BIT; 84 case 2:
64 } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) { 85 return VK_SAMPLE_COUNT_2_BIT;
65 return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; 86 case 4:
66 } else { 87 return VK_SAMPLE_COUNT_4_BIT;
67 UNREACHABLE_MSG("Invalid pixel format={}", pixel_format); 88 case 8:
68 return VK_IMAGE_ASPECT_COLOR_BIT; 89 return VK_SAMPLE_COUNT_8_BIT;
90 case 16:
91 return VK_SAMPLE_COUNT_16_BIT;
92 default:
93 UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
94 return VK_SAMPLE_COUNT_1_BIT;
69 } 95 }
70} 96}
71 97
72VkImageViewType GetImageViewType(SurfaceTarget target) { 98[[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const Device& device, const ImageInfo& info) {
73 switch (target) { 99 const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, info.format);
74 case SurfaceTarget::Texture1D: 100 VkImageCreateFlags flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
75 return VK_IMAGE_VIEW_TYPE_1D; 101 if (info.type == ImageType::e2D && info.resources.layers >= 6 &&
76 case SurfaceTarget::Texture2D: 102 info.size.width == info.size.height) {
77 return VK_IMAGE_VIEW_TYPE_2D; 103 flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
78 case SurfaceTarget::Texture3D:
79 return VK_IMAGE_VIEW_TYPE_3D;
80 case SurfaceTarget::Texture1DArray:
81 return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
82 case SurfaceTarget::Texture2DArray:
83 return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
84 case SurfaceTarget::TextureCubemap:
85 return VK_IMAGE_VIEW_TYPE_CUBE;
86 case SurfaceTarget::TextureCubeArray:
87 return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
88 case SurfaceTarget::TextureBuffer:
89 break;
90 } 104 }
91 UNREACHABLE(); 105 if (info.type == ImageType::e3D) {
92 return {}; 106 flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
93} 107 }
94 108 VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
95vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, 109 VK_IMAGE_USAGE_SAMPLED_BIT;
96 std::size_t host_memory_size) { 110 if (format_info.attachable) {
97 // TODO(Rodrigo): Move texture buffer creation to the buffer cache 111 switch (VideoCore::Surface::GetFormatType(info.format)) {
98 return device.GetLogical().CreateBuffer({ 112 case VideoCore::Surface::SurfaceType::ColorTexture:
99 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 113 usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
114 break;
115 case VideoCore::Surface::SurfaceType::Depth:
116 case VideoCore::Surface::SurfaceType::DepthStencil:
117 usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
118 break;
119 default:
120 UNREACHABLE_MSG("Invalid surface type");
121 }
122 }
123 if (format_info.storage) {
124 usage |= VK_IMAGE_USAGE_STORAGE_BIT;
125 }
126 const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(info.num_samples);
127 return VkImageCreateInfo{
128 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
100 .pNext = nullptr, 129 .pNext = nullptr,
101 .flags = 0, 130 .flags = flags,
102 .size = static_cast<VkDeviceSize>(host_memory_size), 131 .imageType = ConvertImageType(info.type),
103 .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | 132 .format = format_info.format,
104 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | 133 .extent =
105 VK_BUFFER_USAGE_TRANSFER_DST_BIT, 134 {
135 .width = info.size.width >> samples_x,
136 .height = info.size.height >> samples_y,
137 .depth = info.size.depth,
138 },
139 .mipLevels = static_cast<u32>(info.resources.levels),
140 .arrayLayers = static_cast<u32>(info.resources.layers),
141 .samples = ConvertSampleCount(info.num_samples),
142 .tiling = VK_IMAGE_TILING_OPTIMAL,
143 .usage = usage,
106 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 144 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
107 .queueFamilyIndexCount = 0, 145 .queueFamilyIndexCount = 0,
108 .pQueueFamilyIndices = nullptr, 146 .pQueueFamilyIndices = nullptr,
109 }); 147 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
110}
111
112VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
113 const SurfaceParams& params, VkBuffer buffer,
114 std::size_t host_memory_size) {
115 ASSERT(params.IsBuffer());
116
117 return {
118 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
119 .pNext = nullptr,
120 .flags = 0,
121 .buffer = buffer,
122 .format =
123 MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format,
124 .offset = 0,
125 .range = static_cast<VkDeviceSize>(host_memory_size),
126 }; 148 };
127} 149}
128 150
129VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { 151[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info) {
130 ASSERT(!params.IsBuffer()); 152 if (info.type == ImageType::Buffer) {
131 153 return vk::Image{};
132 const auto [format, attachable, storage] = 154 }
133 MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); 155 return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info));
156}
134 157
135 VkImageCreateInfo ci{ 158[[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) {
136 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 159 if (info.type != ImageType::Buffer) {
160 return vk::Buffer{};
161 }
162 const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format);
163 return device.GetLogical().CreateBuffer(VkBufferCreateInfo{
164 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
137 .pNext = nullptr, 165 .pNext = nullptr,
138 .flags = 0, 166 .flags = 0,
139 .imageType = SurfaceTargetToImage(params.target), 167 .size = info.size.width * bytes_per_block,
140 .format = format, 168 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
141 .extent = {}, 169 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
142 .mipLevels = params.num_levels, 170 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
143 .arrayLayers = static_cast<u32>(params.GetNumLayers()),
144 .samples = VK_SAMPLE_COUNT_1_BIT,
145 .tiling = VK_IMAGE_TILING_OPTIMAL,
146 .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
147 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
148 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 171 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
149 .queueFamilyIndexCount = 0, 172 .queueFamilyIndexCount = 0,
150 .pQueueFamilyIndices = nullptr, 173 .pQueueFamilyIndices = nullptr,
151 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, 174 });
152 };
153 if (attachable) {
154 ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT
155 : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
156 }
157 if (storage) {
158 ci.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
159 }
160
161 switch (params.target) {
162 case SurfaceTarget::TextureCubemap:
163 case SurfaceTarget::TextureCubeArray:
164 ci.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
165 [[fallthrough]];
166 case SurfaceTarget::Texture1D:
167 case SurfaceTarget::Texture1DArray:
168 case SurfaceTarget::Texture2D:
169 case SurfaceTarget::Texture2DArray:
170 ci.extent = {params.width, params.height, 1};
171 break;
172 case SurfaceTarget::Texture3D:
173 ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
174 ci.extent = {params.width, params.height, params.depth};
175 break;
176 case SurfaceTarget::TextureBuffer:
177 UNREACHABLE();
178 }
179
180 return ci;
181} 175}
182 176
183u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, 177[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) {
184 SwizzleSource w_source) { 178 switch (VideoCore::Surface::GetFormatType(format)) {
185 return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | 179 case VideoCore::Surface::SurfaceType::ColorTexture:
186 (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); 180 return VK_IMAGE_ASPECT_COLOR_BIT;
181 case VideoCore::Surface::SurfaceType::Depth:
182 return VK_IMAGE_ASPECT_DEPTH_BIT;
183 case VideoCore::Surface::SurfaceType::DepthStencil:
184 return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
185 default:
186 UNREACHABLE_MSG("Invalid surface type");
187 return VkImageAspectFlags{};
188 }
187} 189}
188 190
189} // Anonymous namespace 191[[nodiscard]] VkImageAspectFlags ImageViewAspectMask(const VideoCommon::ImageViewInfo& info) {
190 192 if (info.IsRenderTarget()) {
191CachedSurface::CachedSurface(const VKDevice& device_, VKMemoryManager& memory_manager_, 193 return ImageAspectMask(info.format);
192 VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_,
193 GPUVAddr gpu_addr_, const SurfaceParams& params_)
194 : SurfaceBase<View>{gpu_addr_, params_, device_.IsOptimalAstcSupported()}, device{device_},
195 memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{staging_pool_} {
196 if (params.IsBuffer()) {
197 buffer = CreateBuffer(device, params, host_memory_size);
198 commit = memory_manager.Commit(buffer, false);
199
200 const auto buffer_view_ci =
201 GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size);
202 format = buffer_view_ci.format;
203
204 buffer_view = device.GetLogical().CreateBufferView(buffer_view_ci);
205 } else {
206 const auto image_ci = GenerateImageCreateInfo(device, params);
207 format = image_ci.format;
208
209 image.emplace(device, scheduler, image_ci, PixelFormatToImageAspect(params.pixel_format));
210 commit = memory_manager.Commit(image->GetHandle(), false);
211 } 194 }
212 195 const bool is_first = info.Swizzle()[0] == SwizzleSource::R;
213 // TODO(Rodrigo): Move this to a virtual function. 196 switch (info.format) {
214 u32 num_layers = 1; 197 case PixelFormat::D24_UNORM_S8_UINT:
215 if (params.is_layered || params.target == SurfaceTarget::Texture3D) { 198 case PixelFormat::D32_FLOAT_S8_UINT:
216 num_layers = params.depth; 199 return is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT;
200 case PixelFormat::S8_UINT_D24_UNORM:
201 return is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
202 case PixelFormat::D16_UNORM:
203 case PixelFormat::D32_FLOAT:
204 return VK_IMAGE_ASPECT_DEPTH_BIT;
205 default:
206 return VK_IMAGE_ASPECT_COLOR_BIT;
217 } 207 }
218 main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels));
219} 208}
220 209
221CachedSurface::~CachedSurface() = default; 210[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device,
222 211 const ImageView* image_view) {
223void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { 212 const auto pixel_format = image_view->format;
224 // To upload data we have to be outside of a renderpass 213 return VkAttachmentDescription{
225 scheduler.RequestOutsideRenderPassOperationContext(); 214 .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
215 .format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format).format,
216 .samples = image_view->Samples(),
217 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
218 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
219 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
220 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
221 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
222 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
223 };
224}
226 225
227 if (params.IsBuffer()) { 226[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) {
228 UploadBuffer(staging_buffer); 227 switch (swizzle) {
229 } else { 228 case SwizzleSource::Zero:
230 UploadImage(staging_buffer); 229 return VK_COMPONENT_SWIZZLE_ZERO;
230 case SwizzleSource::R:
231 return VK_COMPONENT_SWIZZLE_R;
232 case SwizzleSource::G:
233 return VK_COMPONENT_SWIZZLE_G;
234 case SwizzleSource::B:
235 return VK_COMPONENT_SWIZZLE_B;
236 case SwizzleSource::A:
237 return VK_COMPONENT_SWIZZLE_A;
238 case SwizzleSource::OneFloat:
239 case SwizzleSource::OneInt:
240 return VK_COMPONENT_SWIZZLE_ONE;
231 } 241 }
242 UNREACHABLE_MSG("Invalid swizzle={}", swizzle);
243 return VK_COMPONENT_SWIZZLE_ZERO;
232} 244}
233 245
234void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { 246[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) {
235 UNIMPLEMENTED_IF(params.IsBuffer()); 247 switch (type) {
236 248 case VideoCommon::ImageViewType::e1D:
237 if (params.pixel_format == PixelFormat::A1B5G5R5_UNORM) { 249 return VK_IMAGE_VIEW_TYPE_1D;
238 LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed"); 250 case VideoCommon::ImageViewType::e2D:
251 return VK_IMAGE_VIEW_TYPE_2D;
252 case VideoCommon::ImageViewType::Cube:
253 return VK_IMAGE_VIEW_TYPE_CUBE;
254 case VideoCommon::ImageViewType::e3D:
255 return VK_IMAGE_VIEW_TYPE_3D;
256 case VideoCommon::ImageViewType::e1DArray:
257 return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
258 case VideoCommon::ImageViewType::e2DArray:
259 return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
260 case VideoCommon::ImageViewType::CubeArray:
261 return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
262 case VideoCommon::ImageViewType::Rect:
263 LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported");
264 return VK_IMAGE_VIEW_TYPE_2D;
265 case VideoCommon::ImageViewType::Buffer:
266 UNREACHABLE_MSG("Texture buffers can't be image views");
267 return VK_IMAGE_VIEW_TYPE_1D;
239 } 268 }
269 UNREACHABLE_MSG("Invalid image view type={}", type);
270 return VK_IMAGE_VIEW_TYPE_2D;
271}
240 272
241 // We can't copy images to buffers inside a renderpass 273[[nodiscard]] VkImageSubresourceLayers MakeImageSubresourceLayers(
242 scheduler.RequestOutsideRenderPassOperationContext(); 274 VideoCommon::SubresourceLayers subresource, VkImageAspectFlags aspect_mask) {
275 return VkImageSubresourceLayers{
276 .aspectMask = aspect_mask,
277 .mipLevel = static_cast<u32>(subresource.base_level),
278 .baseArrayLayer = static_cast<u32>(subresource.base_layer),
279 .layerCount = static_cast<u32>(subresource.num_layers),
280 };
281}
243 282
244 FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, 283[[nodiscard]] VkOffset3D MakeOffset3D(VideoCommon::Offset3D offset3d) {
245 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); 284 return VkOffset3D{
285 .x = offset3d.x,
286 .y = offset3d.y,
287 .z = offset3d.z,
288 };
289}
246 290
247 const auto& unused_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); 291[[nodiscard]] VkExtent3D MakeExtent3D(VideoCommon::Extent3D extent3d) {
248 // TODO(Rodrigo): Do this in a single copy 292 return VkExtent3D{
249 for (u32 level = 0; level < params.num_levels; ++level) { 293 .width = static_cast<u32>(extent3d.width),
250 scheduler.Record([image = *image->GetHandle(), buffer = *unused_buffer.handle, 294 .height = static_cast<u32>(extent3d.height),
251 copy = GetBufferImageCopy(level)](vk::CommandBuffer cmdbuf) { 295 .depth = static_cast<u32>(extent3d.depth),
252 cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy); 296 };
253 }); 297}
254 }
255 scheduler.Finish();
256 298
257 // TODO(Rodrigo): Use an intern buffer for staging buffers and avoid this unnecessary memcpy. 299[[nodiscard]] VkImageCopy MakeImageCopy(const VideoCommon::ImageCopy& copy,
258 std::memcpy(staging_buffer.data(), unused_buffer.commit->Map(host_memory_size), 300 VkImageAspectFlags aspect_mask) noexcept {
259 host_memory_size); 301 return VkImageCopy{
302 .srcSubresource = MakeImageSubresourceLayers(copy.src_subresource, aspect_mask),
303 .srcOffset = MakeOffset3D(copy.src_offset),
304 .dstSubresource = MakeImageSubresourceLayers(copy.dst_subresource, aspect_mask),
305 .dstOffset = MakeOffset3D(copy.dst_offset),
306 .extent = MakeExtent3D(copy.extent),
307 };
260} 308}
261 309
262void CachedSurface::DecorateSurfaceName() { 310[[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
263 // TODO(Rodrigo): Add name decorations 311 std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
312 std::vector<VkBufferCopy> result(copies.size());
313 std::ranges::transform(
314 copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) {
315 return VkBufferCopy{
316 .srcOffset = static_cast<VkDeviceSize>(copy.src_offset + buffer_offset),
317 .dstOffset = static_cast<VkDeviceSize>(copy.dst_offset),
318 .size = static_cast<VkDeviceSize>(copy.size),
319 };
320 });
321 return result;
264} 322}
265 323
266View CachedSurface::CreateView(const ViewParams& view_params) { 324[[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies(
267 // TODO(Rodrigo): Add name decorations 325 std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) {
268 return views[view_params] = std::make_shared<CachedSurfaceView>(device, *this, view_params); 326 struct Maker {
327 VkBufferImageCopy operator()(const BufferImageCopy& copy) const {
328 return VkBufferImageCopy{
329 .bufferOffset = copy.buffer_offset + buffer_offset,
330 .bufferRowLength = copy.buffer_row_length,
331 .bufferImageHeight = copy.buffer_image_height,
332 .imageSubresource =
333 {
334 .aspectMask = aspect_mask,
335 .mipLevel = static_cast<u32>(copy.image_subresource.base_level),
336 .baseArrayLayer = static_cast<u32>(copy.image_subresource.base_layer),
337 .layerCount = static_cast<u32>(copy.image_subresource.num_layers),
338 },
339 .imageOffset =
340 {
341 .x = copy.image_offset.x,
342 .y = copy.image_offset.y,
343 .z = copy.image_offset.z,
344 },
345 .imageExtent =
346 {
347 .width = copy.image_extent.width,
348 .height = copy.image_extent.height,
349 .depth = copy.image_extent.depth,
350 },
351 };
352 }
353 size_t buffer_offset;
354 VkImageAspectFlags aspect_mask;
355 };
356 if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
357 std::vector<VkBufferImageCopy> result(copies.size() * 2);
358 std::ranges::transform(copies, result.begin(),
359 Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT});
360 std::ranges::transform(copies, result.begin() + copies.size(),
361 Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT});
362 return result;
363 } else {
364 std::vector<VkBufferImageCopy> result(copies.size());
365 std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask});
366 return result;
367 }
269} 368}
270 369
271void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { 370[[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(VkImageAspectFlags aspect_mask,
272 const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); 371 const SubresourceRange& range) {
273 std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); 372 return VkImageSubresourceRange{
373 .aspectMask = aspect_mask,
374 .baseMipLevel = static_cast<u32>(range.base.level),
375 .levelCount = static_cast<u32>(range.extent.levels),
376 .baseArrayLayer = static_cast<u32>(range.base.layer),
377 .layerCount = static_cast<u32>(range.extent.layers),
378 };
379}
274 380
275 scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer, 381[[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(const ImageView* image_view) {
276 size = host_memory_size](vk::CommandBuffer cmdbuf) { 382 SubresourceRange range = image_view->range;
277 VkBufferCopy copy; 383 if (True(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
278 copy.srcOffset = 0; 384 // Slice image views always affect a single layer, but their subresource range corresponds
279 copy.dstOffset = 0; 385 // to the slice. Override the value to affect a single layer.
280 copy.size = size; 386 range.base.layer = 0;
281 cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); 387 range.extent.layers = 1;
388 }
389 return MakeSubresourceRange(ImageAspectMask(image_view->format), range);
390}
282 391
283 VkBufferMemoryBarrier barrier; 392[[nodiscard]] VkImageSubresourceLayers MakeSubresourceLayers(const ImageView* image_view) {
284 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; 393 return VkImageSubresourceLayers{
285 barrier.pNext = nullptr; 394 .aspectMask = ImageAspectMask(image_view->format),
286 barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; 395 .mipLevel = static_cast<u32>(image_view->range.base.level),
287 barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; 396 .baseArrayLayer = static_cast<u32>(image_view->range.base.layer),
288 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // They'll be ignored anyway 397 .layerCount = static_cast<u32>(image_view->range.extent.layers),
289 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 398 };
290 barrier.buffer = dst_buffer;
291 barrier.offset = 0;
292 barrier.size = size;
293 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
294 0, {}, barrier, {});
295 });
296} 399}
297 400
298void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { 401[[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) {
299 const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); 402 switch (value) {
300 std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); 403 case SwizzleSource::G:
301 404 return SwizzleSource::R;
302 FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, 405 default:
303 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); 406 return value;
304
305 for (u32 level = 0; level < params.num_levels; ++level) {
306 const VkBufferImageCopy copy = GetBufferImageCopy(level);
307 if (image->GetAspectMask() == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
308 scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(),
309 copy](vk::CommandBuffer cmdbuf) {
310 std::array<VkBufferImageCopy, 2> copies = {copy, copy};
311 copies[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
312 copies[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
313 cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
314 copies);
315 });
316 } else {
317 scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(),
318 copy](vk::CommandBuffer cmdbuf) {
319 cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy);
320 });
321 }
322 } 407 }
323} 408}
324 409
325VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { 410void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
326 return { 411 VkImageAspectFlags aspect_mask, bool is_initialized,
327 .bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted), 412 std::span<const VkBufferImageCopy> copies) {
328 .bufferRowLength = 0, 413 static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT |
329 .bufferImageHeight = 0, 414 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
330 .imageSubresource = 415 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
416 const VkImageMemoryBarrier read_barrier{
417 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
418 .pNext = nullptr,
419 .srcAccessMask = ACCESS_FLAGS,
420 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
421 .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
422 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
423 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
424 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
425 .image = image,
426 .subresourceRange =
331 { 427 {
332 .aspectMask = image->GetAspectMask(), 428 .aspectMask = aspect_mask,
333 .mipLevel = level, 429 .baseMipLevel = 0,
430 .levelCount = VK_REMAINING_MIP_LEVELS,
334 .baseArrayLayer = 0, 431 .baseArrayLayer = 0,
335 .layerCount = static_cast<u32>(params.GetNumLayers()), 432 .layerCount = VK_REMAINING_ARRAY_LAYERS,
336 }, 433 },
337 .imageOffset = {.x = 0, .y = 0, .z = 0}, 434 };
338 .imageExtent = 435 const VkImageMemoryBarrier write_barrier{
436 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
437 .pNext = nullptr,
438 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
439 .dstAccessMask = ACCESS_FLAGS,
440 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
441 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
442 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
443 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
444 .image = image,
445 .subresourceRange =
339 { 446 {
340 .width = params.GetMipWidth(level), 447 .aspectMask = aspect_mask,
341 .height = params.GetMipHeight(level), 448 .baseMipLevel = 0,
342 .depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1U, 449 .levelCount = VK_REMAINING_MIP_LEVELS,
450 .baseArrayLayer = 0,
451 .layerCount = VK_REMAINING_ARRAY_LAYERS,
343 }, 452 },
344 }; 453 };
454 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
455 read_barrier);
456 cmdbuf.CopyBufferToImage(src_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copies);
457 // TODO: Move this to another API
458 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0,
459 write_barrier);
345} 460}
346 461
347VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { 462[[nodiscard]] VkImageBlit MakeImageBlit(const std::array<Offset2D, 2>& dst_region,
348 return {image->GetAspectMask(), 0, params.num_levels, 0, 463 const std::array<Offset2D, 2>& src_region,
349 static_cast<u32>(params.GetNumLayers())}; 464 const VkImageSubresourceLayers& dst_layers,
465 const VkImageSubresourceLayers& src_layers) {
466 return VkImageBlit{
467 .srcSubresource = src_layers,
468 .srcOffsets =
469 {
470 {
471 .x = src_region[0].x,
472 .y = src_region[0].y,
473 .z = 0,
474 },
475 {
476 .x = src_region[1].x,
477 .y = src_region[1].y,
478 .z = 1,
479 },
480 },
481 .dstSubresource = dst_layers,
482 .dstOffsets =
483 {
484 {
485 .x = dst_region[0].x,
486 .y = dst_region[0].y,
487 .z = 0,
488 },
489 {
490 .x = dst_region[1].x,
491 .y = dst_region[1].y,
492 .z = 1,
493 },
494 },
495 };
350} 496}
351 497
352CachedSurfaceView::CachedSurfaceView(const VKDevice& device_, CachedSurface& surface_, 498[[nodiscard]] VkImageResolve MakeImageResolve(const std::array<Offset2D, 2>& dst_region,
353 const ViewParams& view_params_) 499 const std::array<Offset2D, 2>& src_region,
354 : ViewBase{view_params_}, surface_params{surface_.GetSurfaceParams()}, 500 const VkImageSubresourceLayers& dst_layers,
355 image{surface_.GetImageHandle()}, buffer_view{surface_.GetBufferViewHandle()}, 501 const VkImageSubresourceLayers& src_layers) {
356 aspect_mask{surface_.GetAspectMask()}, device{device_}, surface{surface_}, 502 return VkImageResolve{
357 base_level{view_params_.base_level}, num_levels{view_params_.num_levels}, 503 .srcSubresource = src_layers,
358 image_view_type{image ? GetImageViewType(view_params_.target) : VK_IMAGE_VIEW_TYPE_1D} { 504 .srcOffset =
359 if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { 505 {
360 base_layer = 0; 506 .x = src_region[0].x,
361 num_layers = 1; 507 .y = src_region[0].y,
362 base_slice = view_params_.base_layer; 508 .z = 0,
363 num_slices = view_params_.num_layers; 509 },
364 } else { 510 .dstSubresource = dst_layers,
365 base_layer = view_params_.base_layer; 511 .dstOffset =
366 num_layers = view_params_.num_layers; 512 {
367 } 513 .x = dst_region[0].x,
514 .y = dst_region[0].y,
515 .z = 0,
516 },
517 .extent =
518 {
519 .width = static_cast<u32>(dst_region[1].x - dst_region[0].x),
520 .height = static_cast<u32>(dst_region[1].y - dst_region[0].y),
521 .depth = 1,
522 },
523 };
368} 524}
369 525
370CachedSurfaceView::~CachedSurfaceView() = default; 526struct RangedBarrierRange {
371 527 u32 min_mip = std::numeric_limits<u32>::max();
372VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source, 528 u32 max_mip = std::numeric_limits<u32>::min();
373 SwizzleSource z_source, SwizzleSource w_source) { 529 u32 min_layer = std::numeric_limits<u32>::max();
374 const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); 530 u32 max_layer = std::numeric_limits<u32>::min();
375 if (last_image_view && last_swizzle == new_swizzle) { 531
376 return last_image_view; 532 void AddLayers(const VkImageSubresourceLayers& layers) {
533 min_mip = std::min(min_mip, layers.mipLevel);
534 max_mip = std::max(max_mip, layers.mipLevel + 1);
535 min_layer = std::min(min_layer, layers.baseArrayLayer);
536 max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount);
377 } 537 }
378 last_swizzle = new_swizzle;
379 538
380 const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); 539 VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept {
381 auto& image_view = entry->second; 540 return VkImageSubresourceRange{
382 if (!is_cache_miss) { 541 .aspectMask = aspect_mask,
383 return last_image_view = *image_view; 542 .baseMipLevel = min_mip,
543 .levelCount = max_mip - min_mip,
544 .baseArrayLayer = min_layer,
545 .layerCount = max_layer - min_layer,
546 };
384 } 547 }
548};
385 549
386 std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source), 550} // Anonymous namespace
387 MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)};
388 if (surface_params.pixel_format == PixelFormat::A1B5G5R5_UNORM) {
389 // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here.
390 std::swap(swizzle[0], swizzle[2]);
391 }
392 551
393 // Games can sample depth or stencil values on textures. This is decided by the swizzle value on 552void TextureCacheRuntime::Finish() {
394 // hardware. To emulate this on Vulkan we specify it in the aspect. 553 scheduler.Finish();
395 VkImageAspectFlags aspect = aspect_mask; 554}
396 if (aspect == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
397 UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
398 const bool is_first = x_source == SwizzleSource::R;
399 switch (surface_params.pixel_format) {
400 case PixelFormat::D24_UNORM_S8_UINT:
401 case PixelFormat::D32_FLOAT_S8_UINT:
402 aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT;
403 break;
404 case PixelFormat::S8_UINT_D24_UNORM:
405 aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
406 break;
407 default:
408 aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
409 UNIMPLEMENTED();
410 }
411 555
412 // Make sure we sample the first component 556ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
413 std::transform( 557 const auto& buffer = staging_buffer_pool.GetUnusedBuffer(size, true);
414 swizzle.begin(), swizzle.end(), swizzle.begin(), [](VkComponentSwizzle component) { 558 return ImageBufferMap{
415 return component == VK_COMPONENT_SWIZZLE_G ? VK_COMPONENT_SWIZZLE_R : component; 559 .handle = *buffer.handle,
416 }); 560 .map = buffer.commit->Map(size),
417 } 561 };
562}
418 563
419 if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { 564void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
420 ASSERT(base_slice == 0); 565 const std::array<Offset2D, 2>& dst_region,
421 ASSERT(num_slices == surface_params.depth); 566 const std::array<Offset2D, 2>& src_region,
567 Tegra::Engines::Fermi2D::Filter filter,
568 Tegra::Engines::Fermi2D::Operation operation) {
569 const VkImageAspectFlags aspect_mask = ImageAspectMask(src.format);
570 const bool is_dst_msaa = dst.Samples() != VK_SAMPLE_COUNT_1_BIT;
571 const bool is_src_msaa = src.Samples() != VK_SAMPLE_COUNT_1_BIT;
572 ASSERT(aspect_mask == ImageAspectMask(dst.format));
573 if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) {
574 blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter,
575 operation);
576 return;
422 } 577 }
423 578 if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
424 image_view = device.GetLogical().CreateImageView({ 579 if (!device.IsBlitDepthStencilSupported()) {
425 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 580 UNIMPLEMENTED_IF(is_src_msaa || is_dst_msaa);
426 .pNext = nullptr, 581 blit_image_helper.BlitDepthStencil(dst_framebuffer, src.DepthView(), src.StencilView(),
427 .flags = 0, 582 dst_region, src_region, filter, operation);
428 .image = surface.GetImageHandle(), 583 return;
429 .viewType = image_view_type, 584 }
430 .format = surface.GetImage().GetFormat(), 585 }
431 .components = 586 ASSERT(src.ImageFormat() == dst.ImageFormat());
432 { 587 ASSERT(!(is_dst_msaa && !is_src_msaa));
433 .r = swizzle[0], 588 ASSERT(operation == Fermi2D::Operation::SrcCopy);
434 .g = swizzle[1], 589
435 .b = swizzle[2], 590 const VkImage dst_image = dst.ImageHandle();
436 .a = swizzle[3], 591 const VkImage src_image = src.ImageHandle();
592 const VkImageSubresourceLayers dst_layers = MakeSubresourceLayers(&dst);
593 const VkImageSubresourceLayers src_layers = MakeSubresourceLayers(&src);
594 const bool is_resolve = is_src_msaa && !is_dst_msaa;
595 scheduler.RequestOutsideRenderPassOperationContext();
596 scheduler.Record([filter, dst_region, src_region, dst_image, src_image, dst_layers, src_layers,
597 aspect_mask, is_resolve](vk::CommandBuffer cmdbuf) {
598 const std::array read_barriers{
599 VkImageMemoryBarrier{
600 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
601 .pNext = nullptr,
602 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT |
603 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
604 VK_ACCESS_TRANSFER_WRITE_BIT,
605 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
606 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
607 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
608 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
609 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
610 .image = src_image,
611 .subresourceRange{
612 .aspectMask = aspect_mask,
613 .baseMipLevel = 0,
614 .levelCount = VK_REMAINING_MIP_LEVELS,
615 .baseArrayLayer = 0,
616 .layerCount = VK_REMAINING_ARRAY_LAYERS,
617 },
437 }, 618 },
438 .subresourceRange = 619 VkImageMemoryBarrier{
439 { 620 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
440 .aspectMask = aspect, 621 .pNext = nullptr,
441 .baseMipLevel = base_level, 622 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT |
442 .levelCount = num_levels, 623 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
443 .baseArrayLayer = base_layer, 624 VK_ACCESS_TRANSFER_WRITE_BIT,
444 .layerCount = num_layers, 625 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
626 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
627 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
628 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
629 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
630 .image = dst_image,
631 .subresourceRange{
632 .aspectMask = aspect_mask,
633 .baseMipLevel = 0,
634 .levelCount = VK_REMAINING_MIP_LEVELS,
635 .baseArrayLayer = 0,
636 .layerCount = VK_REMAINING_ARRAY_LAYERS,
637 },
638 },
639 };
640 VkImageMemoryBarrier write_barrier{
641 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
642 .pNext = nullptr,
643 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
644 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
645 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
646 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
647 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
648 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
649 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
650 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
651 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
652 .image = dst_image,
653 .subresourceRange{
654 .aspectMask = aspect_mask,
655 .baseMipLevel = 0,
656 .levelCount = VK_REMAINING_MIP_LEVELS,
657 .baseArrayLayer = 0,
658 .layerCount = VK_REMAINING_ARRAY_LAYERS,
445 }, 659 },
660 };
661 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
662 0, nullptr, nullptr, read_barriers);
663 if (is_resolve) {
664 cmdbuf.ResolveImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image,
665 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
666 MakeImageResolve(dst_region, src_region, dst_layers, src_layers));
667 } else {
668 const bool is_linear = filter == Fermi2D::Filter::Bilinear;
669 const VkFilter vk_filter = is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
670 cmdbuf.BlitImage(
671 src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
672 MakeImageBlit(dst_region, src_region, dst_layers, src_layers), vk_filter);
673 }
674 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
675 0, write_barrier);
446 }); 676 });
447
448 return last_image_view = *image_view;
449} 677}
450 678
451VkImageView CachedSurfaceView::GetAttachment() { 679void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
452 if (render_target) { 680 switch (dst_view.format) {
453 return *render_target; 681 case PixelFormat::R16_UNORM:
682 if (src_view.format == PixelFormat::D16_UNORM) {
683 return blit_image_helper.ConvertD16ToR16(dst, src_view);
684 }
685 break;
686 case PixelFormat::R32_FLOAT:
687 if (src_view.format == PixelFormat::D32_FLOAT) {
688 return blit_image_helper.ConvertD32ToR32(dst, src_view);
689 }
690 break;
691 case PixelFormat::D16_UNORM:
692 if (src_view.format == PixelFormat::R16_UNORM) {
693 return blit_image_helper.ConvertR16ToD16(dst, src_view);
694 }
695 break;
696 case PixelFormat::D32_FLOAT:
697 if (src_view.format == PixelFormat::R32_FLOAT) {
698 return blit_image_helper.ConvertR32ToD32(dst, src_view);
699 }
700 break;
701 default:
702 break;
454 } 703 }
704 UNIMPLEMENTED_MSG("Unimplemented format copy from {} to {}", src_view.format, dst_view.format);
705}
455 706
456 VkImageViewCreateInfo ci{ 707void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
457 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 708 std::span<const VideoCommon::ImageCopy> copies) {
458 .pNext = nullptr, 709 std::vector<VkImageCopy> vk_copies(copies.size());
459 .flags = 0, 710 const VkImageAspectFlags aspect_mask = dst.AspectMask();
460 .image = surface.GetImageHandle(), 711 ASSERT(aspect_mask == src.AspectMask());
461 .viewType = VK_IMAGE_VIEW_TYPE_1D, 712
462 .format = surface.GetImage().GetFormat(), 713 std::ranges::transform(copies, vk_copies.begin(), [aspect_mask](const auto& copy) {
463 .components = 714 return MakeImageCopy(copy, aspect_mask);
464 { 715 });
465 .r = VK_COMPONENT_SWIZZLE_IDENTITY, 716 const VkImage dst_image = dst.Handle();
466 .g = VK_COMPONENT_SWIZZLE_IDENTITY, 717 const VkImage src_image = src.Handle();
467 .b = VK_COMPONENT_SWIZZLE_IDENTITY, 718 scheduler.RequestOutsideRenderPassOperationContext();
468 .a = VK_COMPONENT_SWIZZLE_IDENTITY, 719 scheduler.Record([dst_image, src_image, aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
720 RangedBarrierRange dst_range;
721 RangedBarrierRange src_range;
722 for (const VkImageCopy& copy : vk_copies) {
723 dst_range.AddLayers(copy.dstSubresource);
724 src_range.AddLayers(copy.srcSubresource);
725 }
726 const std::array read_barriers{
727 VkImageMemoryBarrier{
728 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
729 .pNext = nullptr,
730 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
731 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
732 VK_ACCESS_TRANSFER_WRITE_BIT,
733 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
734 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
735 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
736 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
737 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
738 .image = src_image,
739 .subresourceRange = src_range.SubresourceRange(aspect_mask),
469 }, 740 },
470 .subresourceRange = 741 VkImageMemoryBarrier{
471 { 742 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
472 .aspectMask = aspect_mask, 743 .pNext = nullptr,
473 .baseMipLevel = base_level, 744 .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
474 .levelCount = num_levels, 745 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
475 .baseArrayLayer = 0, 746 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
476 .layerCount = 0, 747 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
748 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
749 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
750 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
751 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
752 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
753 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
754 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
755 .image = dst_image,
756 .subresourceRange = dst_range.SubresourceRange(aspect_mask),
477 }, 757 },
478 }; 758 };
479 if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { 759 const VkImageMemoryBarrier write_barrier{
480 ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; 760 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
481 ci.subresourceRange.baseArrayLayer = base_slice; 761 .pNext = nullptr,
482 ci.subresourceRange.layerCount = num_slices; 762 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
763 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
764 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
765 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
766 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
767 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
768 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
769 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
770 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
771 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
772 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
773 .image = dst_image,
774 .subresourceRange = dst_range.SubresourceRange(aspect_mask),
775 };
776 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
777 0, {}, {}, read_barriers);
778 cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image,
779 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk_copies);
780 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
781 0, write_barrier);
782 });
783}
784
785Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_,
786 VAddr cpu_addr_)
787 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler},
788 image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)),
789 aspect_mask(ImageAspectMask(info.format)) {
790 if (image) {
791 commit = runtime.memory_manager.Commit(image, false);
483 } else { 792 } else {
484 ci.viewType = image_view_type; 793 commit = runtime.memory_manager.Commit(buffer, false);
485 ci.subresourceRange.baseArrayLayer = base_layer; 794 }
486 ci.subresourceRange.layerCount = num_layers; 795 if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
796 flags |= VideoCommon::ImageFlagBits::Converted;
797 }
798 if (runtime.device.HasDebuggingToolAttached()) {
799 if (image) {
800 image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
801 } else {
802 buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
803 }
487 } 804 }
488 render_target = device.GetLogical().CreateImageView(ci);
489 return *render_target;
490} 805}
491 806
492VKTextureCache::VKTextureCache(VideoCore::RasterizerInterface& rasterizer_, 807void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
493 Tegra::Engines::Maxwell3D& maxwell3d_, 808 std::span<const BufferImageCopy> copies) {
494 Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, 809 // TODO: Move this to another API
495 VKMemoryManager& memory_manager_, VKScheduler& scheduler_, 810 scheduler->RequestOutsideRenderPassOperationContext();
496 VKStagingBufferPool& staging_pool_) 811 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
497 : TextureCache(rasterizer_, maxwell3d_, gpu_memory_, device_.IsOptimalAstcSupported()), 812 const VkBuffer src_buffer = map.handle;
498 device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ 813 const VkImage vk_image = *image;
499 staging_pool_} {} 814 const VkImageAspectFlags vk_aspect_mask = aspect_mask;
500 815 const bool is_initialized = std::exchange(initialized, true);
501VKTextureCache::~VKTextureCache() = default; 816 scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized,
502 817 vk_copies](vk::CommandBuffer cmdbuf) {
503Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { 818 CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies);
504 return std::make_shared<CachedSurface>(device, memory_manager, scheduler, staging_pool, 819 });
505 gpu_addr, params);
506} 820}
507 821
508void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, 822void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
509 const VideoCommon::CopyParams& copy_params) { 823 std::span<const VideoCommon::BufferCopy> copies) {
510 const bool src_3d = src_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; 824 // TODO: Move this to another API
511 const bool dst_3d = dst_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; 825 scheduler->RequestOutsideRenderPassOperationContext();
512 UNIMPLEMENTED_IF(src_3d); 826 std::vector vk_copies = TransformBufferCopies(copies, buffer_offset);
827 const VkBuffer src_buffer = map.handle;
828 const VkBuffer dst_buffer = *buffer;
829 scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
830 // TODO: Barriers
831 cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
832 });
833}
513 834
514 // The texture cache handles depth in OpenGL terms, we have to handle it as subresource and 835void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
515 // dimension respectively. 836 std::span<const BufferImageCopy> copies) {
516 const u32 dst_base_layer = dst_3d ? 0 : copy_params.dest_z; 837 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
517 const u32 dst_offset_z = dst_3d ? copy_params.dest_z : 0; 838 scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask,
839 vk_copies](vk::CommandBuffer cmdbuf) {
840 // TODO: Barriers
841 cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies);
842 });
843}
518 844
519 const u32 extent_z = dst_3d ? copy_params.depth : 1; 845ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
520 const u32 num_layers = dst_3d ? 1 : copy_params.depth; 846 ImageId image_id_, Image& image)
847 : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device},
848 image_handle{image.Handle()}, image_format{image.info.format}, samples{ConvertSampleCount(
849 image.info.num_samples)} {
850 const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info);
851 std::array<SwizzleSource, 4> swizzle{
852 SwizzleSource::R,
853 SwizzleSource::G,
854 SwizzleSource::B,
855 SwizzleSource::A,
856 };
857 if (!info.IsRenderTarget()) {
858 swizzle = info.Swizzle();
859 if ((aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) {
860 std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
861 }
862 }
863 const VkFormat vk_format =
864 MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, format).format;
865 const VkImageViewCreateInfo create_info{
866 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
867 .pNext = nullptr,
868 .flags = 0,
869 .image = image.Handle(),
870 .viewType = VkImageViewType{},
871 .format = vk_format,
872 .components{
873 .r = ComponentSwizzle(swizzle[0]),
874 .g = ComponentSwizzle(swizzle[1]),
875 .b = ComponentSwizzle(swizzle[2]),
876 .a = ComponentSwizzle(swizzle[3]),
877 },
878 .subresourceRange = MakeSubresourceRange(aspect_mask, info.range),
879 };
880 const auto create = [&](VideoCommon::ImageViewType view_type, std::optional<u32> num_layers) {
881 VkImageViewCreateInfo ci{create_info};
882 ci.viewType = ImageViewType(view_type);
883 if (num_layers) {
884 ci.subresourceRange.layerCount = *num_layers;
885 }
886 vk::ImageView handle = device->GetLogical().CreateImageView(ci);
887 if (device->HasDebuggingToolAttached()) {
888 handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str());
889 }
890 image_views[static_cast<size_t>(view_type)] = std::move(handle);
891 };
892 switch (info.type) {
893 case VideoCommon::ImageViewType::e1D:
894 case VideoCommon::ImageViewType::e1DArray:
895 create(VideoCommon::ImageViewType::e1D, 1);
896 create(VideoCommon::ImageViewType::e1DArray, std::nullopt);
897 render_target = Handle(VideoCommon::ImageViewType::e1DArray);
898 break;
899 case VideoCommon::ImageViewType::e2D:
900 case VideoCommon::ImageViewType::e2DArray:
901 create(VideoCommon::ImageViewType::e2D, 1);
902 create(VideoCommon::ImageViewType::e2DArray, std::nullopt);
903 render_target = Handle(VideoCommon::ImageViewType::e2DArray);
904 break;
905 case VideoCommon::ImageViewType::e3D:
906 create(VideoCommon::ImageViewType::e3D, std::nullopt);
907 render_target = Handle(VideoCommon::ImageViewType::e3D);
908 break;
909 case VideoCommon::ImageViewType::Cube:
910 case VideoCommon::ImageViewType::CubeArray:
911 create(VideoCommon::ImageViewType::Cube, 6);
912 create(VideoCommon::ImageViewType::CubeArray, std::nullopt);
913 break;
914 case VideoCommon::ImageViewType::Rect:
915 UNIMPLEMENTED();
916 break;
917 case VideoCommon::ImageViewType::Buffer:
918 buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{
919 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
920 .pNext = nullptr,
921 .flags = 0,
922 .buffer = image.Buffer(),
923 .format = vk_format,
924 .offset = 0, // TODO: Redesign buffer cache to support this
925 .range = image.guest_size_bytes,
926 });
927 break;
928 }
929}
521 930
522 // We can't copy inside a renderpass 931ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params)
523 scheduler.RequestOutsideRenderPassOperationContext(); 932 : VideoCommon::ImageViewBase{params} {}
524 933
525 src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1, 934VkImageView ImageView::DepthView() {
526 VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, 935 if (depth_view) {
527 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); 936 return *depth_view;
528 dst_surface->Transition(dst_base_layer, num_layers, copy_params.dest_level, 1, 937 }
529 VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, 938 depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT);
530 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); 939 return *depth_view;
940}
531 941
532 const VkImageCopy copy{ 942VkImageView ImageView::StencilView() {
533 .srcSubresource = 943 if (stencil_view) {
534 { 944 return *stencil_view;
535 .aspectMask = src_surface->GetAspectMask(), 945 }
536 .mipLevel = copy_params.source_level, 946 stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT);
537 .baseArrayLayer = copy_params.source_z, 947 return *stencil_view;
538 .layerCount = num_layers, 948}
539 },
540 .srcOffset =
541 {
542 .x = static_cast<s32>(copy_params.source_x),
543 .y = static_cast<s32>(copy_params.source_y),
544 .z = 0,
545 },
546 .dstSubresource =
547 {
548 .aspectMask = dst_surface->GetAspectMask(),
549 .mipLevel = copy_params.dest_level,
550 .baseArrayLayer = dst_base_layer,
551 .layerCount = num_layers,
552 },
553 .dstOffset =
554 {
555 .x = static_cast<s32>(copy_params.dest_x),
556 .y = static_cast<s32>(copy_params.dest_y),
557 .z = static_cast<s32>(dst_offset_z),
558 },
559 .extent =
560 {
561 .width = copy_params.width,
562 .height = copy_params.height,
563 .depth = extent_z,
564 },
565 };
566 949
567 const VkImage src_image = src_surface->GetImageHandle(); 950vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) {
568 const VkImage dst_image = dst_surface->GetImageHandle(); 951 return device->GetLogical().CreateImageView({
569 scheduler.Record([src_image, dst_image, copy](vk::CommandBuffer cmdbuf) { 952 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
570 cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, 953 .pNext = nullptr,
571 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); 954 .flags = 0,
955 .image = image_handle,
956 .viewType = ImageViewType(type),
957 .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, format).format,
958 .components{
959 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
960 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
961 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
962 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
963 },
964 .subresourceRange = MakeSubresourceRange(aspect_mask, range),
572 }); 965 });
573} 966}
574 967
575void VKTextureCache::ImageBlit(View& src_view, View& dst_view, 968Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& tsc) {
576 const Tegra::Engines::Fermi2D::Config& copy_config) { 969 const auto& device = runtime.device;
577 // We can't blit inside a renderpass 970 const bool arbitrary_borders = runtime.device.IsExtCustomBorderColorSupported();
578 scheduler.RequestOutsideRenderPassOperationContext(); 971 const std::array<float, 4> color = tsc.BorderColor();
579 972 // C++20 bit_cast
580 src_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, 973 VkClearColorValue border_color;
581 VK_ACCESS_TRANSFER_READ_BIT); 974 std::memcpy(&border_color, &color, sizeof(color));
582 dst_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, 975 const VkSamplerCustomBorderColorCreateInfoEXT border_ci{
583 VK_ACCESS_TRANSFER_WRITE_BIT); 976 .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT,
584 977 .pNext = nullptr,
585 VkImageBlit blit; 978 .customBorderColor = border_color,
586 blit.srcSubresource = src_view->GetImageSubresourceLayers(); 979 .format = VK_FORMAT_UNDEFINED,
587 blit.srcOffsets[0].x = copy_config.src_rect.left; 980 };
588 blit.srcOffsets[0].y = copy_config.src_rect.top; 981 const void* pnext = nullptr;
589 blit.srcOffsets[0].z = 0; 982 if (arbitrary_borders) {
590 blit.srcOffsets[1].x = copy_config.src_rect.right; 983 pnext = &border_ci;
591 blit.srcOffsets[1].y = copy_config.src_rect.bottom; 984 }
592 blit.srcOffsets[1].z = 1; 985 const VkSamplerReductionModeCreateInfoEXT reduction_ci{
593 blit.dstSubresource = dst_view->GetImageSubresourceLayers(); 986 .sType = VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT,
594 blit.dstOffsets[0].x = copy_config.dst_rect.left; 987 .pNext = pnext,
595 blit.dstOffsets[0].y = copy_config.dst_rect.top; 988 .reductionMode = MaxwellToVK::SamplerReduction(tsc.reduction_filter),
596 blit.dstOffsets[0].z = 0; 989 };
597 blit.dstOffsets[1].x = copy_config.dst_rect.right; 990 if (runtime.device.IsExtSamplerFilterMinmaxSupported()) {
598 blit.dstOffsets[1].y = copy_config.dst_rect.bottom; 991 pnext = &reduction_ci;
599 blit.dstOffsets[1].z = 1; 992 } else if (reduction_ci.reductionMode != VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT) {
600 993 LOG_WARNING(Render_Vulkan, "VK_EXT_sampler_filter_minmax is required");
601 const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; 994 }
602 995 // Some games have samplers with garbage. Sanitize them here.
603 scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, 996 const float max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f);
604 is_linear](vk::CommandBuffer cmdbuf) { 997 sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{
605 cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, 998 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
606 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit, 999 .pNext = pnext,
607 is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); 1000 .flags = 0,
1001 .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
1002 .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
1003 .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
1004 .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
1005 .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
1006 .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
1007 .mipLodBias = tsc.LodBias(),
1008 .anisotropyEnable = static_cast<VkBool32>(max_anisotropy > 1.0f ? VK_TRUE : VK_FALSE),
1009 .maxAnisotropy = max_anisotropy,
1010 .compareEnable = tsc.depth_compare_enabled,
1011 .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
1012 .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(),
1013 .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(),
1014 .borderColor =
1015 arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color),
1016 .unnormalizedCoordinates = VK_FALSE,
608 }); 1017 });
609} 1018}
610 1019
611void VKTextureCache::BufferCopy(Surface& src_surface, Surface& dst_surface) { 1020Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
612 // Currently unimplemented. PBO copies should be dropped and we should use a render pass to 1021 ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
613 // convert from color to depth and viceversa. 1022 std::vector<VkAttachmentDescription> descriptions;
614 LOG_WARNING(Render_Vulkan, "Unimplemented"); 1023 std::vector<VkImageView> attachments;
1024 RenderPassKey renderpass_key{};
1025 s32 num_layers = 1;
1026
1027 for (size_t index = 0; index < NUM_RT; ++index) {
1028 const ImageView* const color_buffer = color_buffers[index];
1029 if (!color_buffer) {
1030 renderpass_key.color_formats[index] = PixelFormat::Invalid;
1031 continue;
1032 }
1033 descriptions.push_back(AttachmentDescription(runtime.device, color_buffer));
1034 attachments.push_back(color_buffer->RenderTarget());
1035 renderpass_key.color_formats[index] = color_buffer->format;
1036 num_layers = std::max(num_layers, color_buffer->range.extent.layers);
1037 images[num_images] = color_buffer->ImageHandle();
1038 image_ranges[num_images] = MakeSubresourceRange(color_buffer);
1039 samples = color_buffer->Samples();
1040 ++num_images;
1041 }
1042 const size_t num_colors = attachments.size();
1043 const VkAttachmentReference* depth_attachment =
1044 depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr;
1045 if (depth_buffer) {
1046 descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer));
1047 attachments.push_back(depth_buffer->RenderTarget());
1048 renderpass_key.depth_format = depth_buffer->format;
1049 num_layers = std::max(num_layers, depth_buffer->range.extent.layers);
1050 images[num_images] = depth_buffer->ImageHandle();
1051 image_ranges[num_images] = MakeSubresourceRange(depth_buffer);
1052 samples = depth_buffer->Samples();
1053 ++num_images;
1054 } else {
1055 renderpass_key.depth_format = PixelFormat::Invalid;
1056 }
1057 renderpass_key.samples = samples;
1058
1059 const auto& device = runtime.device.GetLogical();
1060 const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key);
1061 if (is_new) {
1062 const VkSubpassDescription subpass{
1063 .flags = 0,
1064 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
1065 .inputAttachmentCount = 0,
1066 .pInputAttachments = nullptr,
1067 .colorAttachmentCount = static_cast<u32>(num_colors),
1068 .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr,
1069 .pResolveAttachments = nullptr,
1070 .pDepthStencilAttachment = depth_attachment,
1071 .preserveAttachmentCount = 0,
1072 .pPreserveAttachments = nullptr,
1073 };
1074 cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{
1075 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
1076 .pNext = nullptr,
1077 .flags = 0,
1078 .attachmentCount = static_cast<u32>(descriptions.size()),
1079 .pAttachments = descriptions.data(),
1080 .subpassCount = 1,
1081 .pSubpasses = &subpass,
1082 .dependencyCount = 0,
1083 .pDependencies = nullptr,
1084 });
1085 }
1086 renderpass = *cache_pair->second;
1087 render_area = VkExtent2D{
1088 .width = key.size.width,
1089 .height = key.size.height,
1090 };
1091 num_color_buffers = static_cast<u32>(num_colors);
1092 framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{
1093 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
1094 .pNext = nullptr,
1095 .flags = 0,
1096 .renderPass = renderpass,
1097 .attachmentCount = static_cast<u32>(attachments.size()),
1098 .pAttachments = attachments.data(),
1099 .width = key.size.width,
1100 .height = key.size.height,
1101 .layers = static_cast<u32>(num_layers),
1102 });
1103 if (runtime.device.HasDebuggingToolAttached()) {
1104 framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
1105 }
615} 1106}
616 1107
617} // namespace Vulkan 1108} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index b0be4cb0f..92a7aad8b 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -4,217 +4,270 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <memory> 7#include <compare>
8#include <unordered_map> 8#include <span>
9 9
10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/vk_image.h"
12#include "video_core/renderer_vulkan/vk_memory_manager.h" 10#include "video_core/renderer_vulkan/vk_memory_manager.h"
13#include "video_core/renderer_vulkan/vk_scheduler.h"
14#include "video_core/renderer_vulkan/wrapper.h"
15#include "video_core/texture_cache/surface_base.h"
16#include "video_core/texture_cache/texture_cache.h" 11#include "video_core/texture_cache/texture_cache.h"
17 12#include "video_core/vulkan_common/vulkan_wrapper.h"
18namespace VideoCore {
19class RasterizerInterface;
20}
21 13
22namespace Vulkan { 14namespace Vulkan {
23 15
24class RasterizerVulkan; 16using VideoCommon::ImageId;
25class VKDevice; 17using VideoCommon::NUM_RT;
18using VideoCommon::Offset2D;
19using VideoCommon::RenderTargets;
20using VideoCore::Surface::PixelFormat;
21
26class VKScheduler; 22class VKScheduler;
27class VKStagingBufferPool; 23class VKStagingBufferPool;
28 24
29class CachedSurfaceView; 25class BlitImageHelper;
30class CachedSurface; 26class Device;
27class Image;
28class ImageView;
29class Framebuffer;
31 30
32using Surface = std::shared_ptr<CachedSurface>; 31struct RenderPassKey {
33using View = std::shared_ptr<CachedSurfaceView>; 32 constexpr auto operator<=>(const RenderPassKey&) const noexcept = default;
34using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
35 33
36using VideoCommon::SurfaceParams; 34 std::array<PixelFormat, NUM_RT> color_formats;
37using VideoCommon::ViewParams; 35 PixelFormat depth_format;
36 VkSampleCountFlagBits samples;
37};
38 38
39class CachedSurface final : public VideoCommon::SurfaceBase<View> { 39} // namespace Vulkan
40 friend CachedSurfaceView;
41 40
42public: 41namespace std {
43 explicit CachedSurface(const VKDevice& device_, VKMemoryManager& memory_manager_, 42template <>
44 VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, 43struct hash<Vulkan::RenderPassKey> {
45 GPUVAddr gpu_addr_, const SurfaceParams& params_); 44 [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
46 ~CachedSurface(); 45 size_t value = static_cast<size_t>(key.depth_format) << 48;
46 value ^= static_cast<size_t>(key.samples) << 52;
47 for (size_t i = 0; i < key.color_formats.size(); ++i) {
48 value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
49 }
50 return value;
51 }
52};
53} // namespace std
47 54
48 void UploadTexture(const std::vector<u8>& staging_buffer) override; 55namespace Vulkan {
49 void DownloadTexture(std::vector<u8>& staging_buffer) override;
50 56
51 void FullTransition(VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, 57struct ImageBufferMap {
52 VkImageLayout new_layout) { 58 [[nodiscard]] VkBuffer Handle() const noexcept {
53 image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels, 59 return handle;
54 new_stage_mask, new_access, new_layout);
55 } 60 }
56 61
57 void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, 62 [[nodiscard]] std::span<u8> Span() const noexcept {
58 VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, 63 return map.Span();
59 VkImageLayout new_layout) {
60 image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask,
61 new_access, new_layout);
62 } 64 }
63 65
64 VKImage& GetImage() { 66 VkBuffer handle;
65 return *image; 67 MemoryMap map;
66 } 68};
67 69
68 const VKImage& GetImage() const { 70struct TextureCacheRuntime {
69 return *image; 71 const Device& device;
72 VKScheduler& scheduler;
73 VKMemoryManager& memory_manager;
74 VKStagingBufferPool& staging_buffer_pool;
75 BlitImageHelper& blit_image_helper;
76 std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache;
77
78 void Finish();
79
80 [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size);
81
82 [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size) {
83 // TODO: Have a special function for this
84 return MapUploadBuffer(size);
70 } 85 }
71 86
72 VkImage GetImageHandle() const { 87 void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
73 return *image->GetHandle(); 88 const std::array<Offset2D, 2>& dst_region,
89 const std::array<Offset2D, 2>& src_region,
90 Tegra::Engines::Fermi2D::Filter filter,
91 Tegra::Engines::Fermi2D::Operation operation);
92
93 void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
94
95 void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
96
97 [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept {
98 return false;
74 } 99 }
75 100
76 VkImageAspectFlags GetAspectMask() const { 101 void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t,
77 return image->GetAspectMask(); 102 std::span<const VideoCommon::SwizzleParameters>) {
103 UNREACHABLE();
78 } 104 }
79 105
80 VkBufferView GetBufferViewHandle() const { 106 void InsertUploadMemoryBarrier() {}
81 return *buffer_view; 107
108 bool HasBrokenTextureViewFormats() const noexcept {
109 // No known Vulkan driver has broken image views
110 return false;
82 } 111 }
112};
83 113
84protected: 114class Image : public VideoCommon::ImageBase {
85 void DecorateSurfaceName() override; 115public:
116 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
117 VAddr cpu_addr);
86 118
87 View CreateView(const ViewParams& view_params) override; 119 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
120 std::span<const VideoCommon::BufferImageCopy> copies);
88 121
89private: 122 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
90 void UploadBuffer(const std::vector<u8>& staging_buffer); 123 std::span<const VideoCommon::BufferCopy> copies);
91 124
92 void UploadImage(const std::vector<u8>& staging_buffer); 125 void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
126 std::span<const VideoCommon::BufferImageCopy> copies);
93 127
94 VkBufferImageCopy GetBufferImageCopy(u32 level) const; 128 [[nodiscard]] VkImage Handle() const noexcept {
129 return *image;
130 }
95 131
96 VkImageSubresourceRange GetImageSubresourceRange() const; 132 [[nodiscard]] VkBuffer Buffer() const noexcept {
133 return *buffer;
134 }
97 135
98 const VKDevice& device; 136 [[nodiscard]] VkImageCreateFlags AspectMask() const noexcept {
99 VKMemoryManager& memory_manager; 137 return aspect_mask;
100 VKScheduler& scheduler; 138 }
101 VKStagingBufferPool& staging_pool;
102 139
103 std::optional<VKImage> image; 140private:
141 VKScheduler* scheduler;
142 vk::Image image;
104 vk::Buffer buffer; 143 vk::Buffer buffer;
105 vk::BufferView buffer_view;
106 VKMemoryCommit commit; 144 VKMemoryCommit commit;
107 145 VkImageAspectFlags aspect_mask = 0;
108 VkFormat format = VK_FORMAT_UNDEFINED; 146 bool initialized = false;
109}; 147};
110 148
111class CachedSurfaceView final : public VideoCommon::ViewBase { 149class ImageView : public VideoCommon::ImageViewBase {
112public: 150public:
113 explicit CachedSurfaceView(const VKDevice& device_, CachedSurface& surface_, 151 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
114 const ViewParams& view_params_); 152 explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
115 ~CachedSurfaceView();
116 153
117 VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source, 154 [[nodiscard]] VkImageView DepthView();
118 Tegra::Texture::SwizzleSource y_source,
119 Tegra::Texture::SwizzleSource z_source,
120 Tegra::Texture::SwizzleSource w_source);
121 155
122 VkImageView GetAttachment(); 156 [[nodiscard]] VkImageView StencilView();
123 157
124 bool IsSameSurface(const CachedSurfaceView& rhs) const { 158 [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept {
125 return &surface == &rhs.surface; 159 return *image_views[static_cast<size_t>(query_type)];
126 } 160 }
127 161
128 u32 GetWidth() const { 162 [[nodiscard]] VkBufferView BufferView() const noexcept {
129 return surface_params.GetMipWidth(base_level); 163 return *buffer_view;
130 } 164 }
131 165
132 u32 GetHeight() const { 166 [[nodiscard]] VkImage ImageHandle() const noexcept {
133 return surface_params.GetMipHeight(base_level); 167 return image_handle;
134 } 168 }
135 169
136 u32 GetNumLayers() const { 170 [[nodiscard]] VkImageView RenderTarget() const noexcept {
137 return num_layers; 171 return render_target;
138 } 172 }
139 173
140 bool IsBufferView() const { 174 [[nodiscard]] PixelFormat ImageFormat() const noexcept {
141 return buffer_view; 175 return image_format;
142 } 176 }
143 177
144 VkImage GetImage() const { 178 [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
145 return image; 179 return samples;
146 } 180 }
147 181
148 VkBufferView GetBufferView() const { 182private:
149 return buffer_view; 183 [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask);
150 }
151 184
152 VkImageSubresourceRange GetImageSubresourceRange() const { 185 const Device* device = nullptr;
153 return {aspect_mask, base_level, num_levels, base_layer, num_layers}; 186 std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views;
154 } 187 vk::ImageView depth_view;
188 vk::ImageView stencil_view;
189 vk::BufferView buffer_view;
190 VkImage image_handle = VK_NULL_HANDLE;
191 VkImageView render_target = VK_NULL_HANDLE;
192 PixelFormat image_format = PixelFormat::Invalid;
193 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
194};
155 195
156 VkImageSubresourceLayers GetImageSubresourceLayers() const { 196class ImageAlloc : public VideoCommon::ImageAllocBase {};
157 return {surface.GetAspectMask(), base_level, base_layer, num_layers};
158 }
159 197
160 void Transition(VkImageLayout new_layout, VkPipelineStageFlags new_stage_mask, 198class Sampler {
161 VkAccessFlags new_access) const { 199public:
162 surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, 200 explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
163 new_access, new_layout);
164 }
165 201
166 void MarkAsModified(u64 tick) { 202 [[nodiscard]] VkSampler Handle() const noexcept {
167 surface.MarkAsModified(true, tick); 203 return *sampler;
168 } 204 }
169 205
170private: 206private:
171 // Store a copy of these values to avoid double dereference when reading them 207 vk::Sampler sampler;
172 const SurfaceParams surface_params;
173 const VkImage image;
174 const VkBufferView buffer_view;
175 const VkImageAspectFlags aspect_mask;
176
177 const VKDevice& device;
178 CachedSurface& surface;
179 const u32 base_level;
180 const u32 num_levels;
181 const VkImageViewType image_view_type;
182 u32 base_layer = 0;
183 u32 num_layers = 0;
184 u32 base_slice = 0;
185 u32 num_slices = 0;
186
187 VkImageView last_image_view = nullptr;
188 u32 last_swizzle = 0;
189
190 vk::ImageView render_target;
191 std::unordered_map<u32, vk::ImageView> view_cache;
192}; 208};
193 209
194class VKTextureCache final : public TextureCacheBase { 210class Framebuffer {
195public: 211public:
196 explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer_, 212 explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
197 Tegra::Engines::Maxwell3D& maxwell3d_, 213 ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
198 Tegra::MemoryManager& gpu_memory_, const VKDevice& device_,
199 VKMemoryManager& memory_manager_, VKScheduler& scheduler_,
200 VKStagingBufferPool& staging_pool_);
201 ~VKTextureCache();
202 214
203private: 215 [[nodiscard]] VkFramebuffer Handle() const noexcept {
204 Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; 216 return *framebuffer;
217 }
205 218
206 void ImageCopy(Surface& src_surface, Surface& dst_surface, 219 [[nodiscard]] VkRenderPass RenderPass() const noexcept {
207 const VideoCommon::CopyParams& copy_params) override; 220 return renderpass;
221 }
208 222
209 void ImageBlit(View& src_view, View& dst_view, 223 [[nodiscard]] VkExtent2D RenderArea() const noexcept {
210 const Tegra::Engines::Fermi2D::Config& copy_config) override; 224 return render_area;
225 }
211 226
212 void BufferCopy(Surface& src_surface, Surface& dst_surface) override; 227 [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
228 return samples;
229 }
213 230
214 const VKDevice& device; 231 [[nodiscard]] u32 NumColorBuffers() const noexcept {
215 VKMemoryManager& memory_manager; 232 return num_color_buffers;
216 VKScheduler& scheduler; 233 }
217 VKStagingBufferPool& staging_pool; 234
235 [[nodiscard]] u32 NumImages() const noexcept {
236 return num_images;
237 }
238
239 [[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept {
240 return images;
241 }
242
243 [[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept {
244 return image_ranges;
245 }
246
247private:
248 vk::Framebuffer framebuffer;
249 VkRenderPass renderpass{};
250 VkExtent2D render_area{};
251 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
252 u32 num_color_buffers = 0;
253 u32 num_images = 0;
254 std::array<VkImage, 9> images{};
255 std::array<VkImageSubresourceRange, 9> image_ranges{};
218}; 256};
219 257
258struct TextureCacheParams {
259 static constexpr bool ENABLE_VALIDATION = true;
260 static constexpr bool FRAMEBUFFER_BLITS = false;
261 static constexpr bool HAS_EMULATED_COPIES = false;
262
263 using Runtime = Vulkan::TextureCacheRuntime;
264 using Image = Vulkan::Image;
265 using ImageAlloc = Vulkan::ImageAlloc;
266 using ImageView = Vulkan::ImageView;
267 using Sampler = Vulkan::Sampler;
268 using Framebuffer = Vulkan::Framebuffer;
269};
270
271using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
272
220} // namespace Vulkan 273} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index 8826da325..f99273c6a 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -7,14 +7,14 @@
7 7
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "video_core/renderer_vulkan/vk_device.h"
11#include "video_core/renderer_vulkan/vk_scheduler.h" 10#include "video_core/renderer_vulkan/vk_scheduler.h"
12#include "video_core/renderer_vulkan/vk_update_descriptor.h" 11#include "video_core/renderer_vulkan/vk_update_descriptor.h"
13#include "video_core/renderer_vulkan/wrapper.h" 12#include "video_core/vulkan_common/vulkan_device.h"
13#include "video_core/vulkan_common/vulkan_wrapper.h"
14 14
15namespace Vulkan { 15namespace Vulkan {
16 16
17VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const VKDevice& device_, VKScheduler& scheduler_) 17VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_)
18 : device{device_}, scheduler{scheduler_} {} 18 : device{device_}, scheduler{scheduler_} {}
19 19
20VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; 20VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index f7e3c9821..e214f7195 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -8,11 +8,11 @@
8#include <boost/container/static_vector.hpp> 8#include <boost/container/static_vector.hpp>
9 9
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/wrapper.h" 11#include "video_core/vulkan_common/vulkan_wrapper.h"
12 12
13namespace Vulkan { 13namespace Vulkan {
14 14
15class VKDevice; 15class Device;
16class VKScheduler; 16class VKScheduler;
17 17
18struct DescriptorUpdateEntry { 18struct DescriptorUpdateEntry {
@@ -31,7 +31,7 @@ struct DescriptorUpdateEntry {
31 31
32class VKUpdateDescriptorQueue final { 32class VKUpdateDescriptorQueue final {
33public: 33public:
34 explicit VKUpdateDescriptorQueue(const VKDevice& device_, VKScheduler& scheduler_); 34 explicit VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_);
35 ~VKUpdateDescriptorQueue(); 35 ~VKUpdateDescriptorQueue();
36 36
37 void TickFrame(); 37 void TickFrame();
@@ -40,32 +40,36 @@ public:
40 40
41 void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); 41 void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
42 42
43 void AddSampledImage(VkSampler sampler, VkImageView image_view) { 43 void AddSampledImage(VkImageView image_view, VkSampler sampler) {
44 payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); 44 payload.emplace_back(VkDescriptorImageInfo{
45 .sampler = sampler,
46 .imageView = image_view,
47 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
48 });
45 } 49 }
46 50
47 void AddImage(VkImageView image_view) { 51 void AddImage(VkImageView image_view) {
48 payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); 52 payload.emplace_back(VkDescriptorImageInfo{
53 .sampler = VK_NULL_HANDLE,
54 .imageView = image_view,
55 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
56 });
49 } 57 }
50 58
51 void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { 59 void AddBuffer(VkBuffer buffer, u64 offset, size_t size) {
52 payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); 60 payload.emplace_back(VkDescriptorBufferInfo{
61 .buffer = buffer,
62 .offset = offset,
63 .range = size,
64 });
53 } 65 }
54 66
55 void AddTexelBuffer(VkBufferView texel_buffer) { 67 void AddTexelBuffer(VkBufferView texel_buffer) {
56 payload.emplace_back(texel_buffer); 68 payload.emplace_back(texel_buffer);
57 } 69 }
58 70
59 VkImageLayout* LastImageLayout() {
60 return &payload.back().image.imageLayout;
61 }
62
63 const VkImageLayout* LastImageLayout() const {
64 return &payload.back().image.imageLayout;
65 }
66
67private: 71private:
68 const VKDevice& device; 72 const Device& device;
69 VKScheduler& scheduler; 73 VKScheduler& scheduler;
70 74
71 const DescriptorUpdateEntry* upload_start = nullptr; 75 const DescriptorUpdateEntry* upload_start = nullptr;
diff --git a/src/video_core/sampler_cache.cpp b/src/video_core/sampler_cache.cpp
deleted file mode 100644
index 53c7ef12d..000000000
--- a/src/video_core/sampler_cache.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/cityhash.h"
6#include "common/common_types.h"
7#include "video_core/sampler_cache.h"
8
9namespace VideoCommon {
10
11std::size_t SamplerCacheKey::Hash() const {
12 static_assert(sizeof(raw) % sizeof(u64) == 0);
13 return static_cast<std::size_t>(
14 Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64)));
15}
16
17bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const {
18 return raw == rhs.raw;
19}
20
21} // namespace VideoCommon
diff --git a/src/video_core/sampler_cache.h b/src/video_core/sampler_cache.h
deleted file mode 100644
index cbe3ad071..000000000
--- a/src/video_core/sampler_cache.h
+++ /dev/null
@@ -1,60 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <unordered_map>
9
10#include "video_core/textures/texture.h"
11
12namespace VideoCommon {
13
14struct SamplerCacheKey final : public Tegra::Texture::TSCEntry {
15 std::size_t Hash() const;
16
17 bool operator==(const SamplerCacheKey& rhs) const;
18
19 bool operator!=(const SamplerCacheKey& rhs) const {
20 return !operator==(rhs);
21 }
22};
23
24} // namespace VideoCommon
25
26namespace std {
27
28template <>
29struct hash<VideoCommon::SamplerCacheKey> {
30 std::size_t operator()(const VideoCommon::SamplerCacheKey& k) const noexcept {
31 return k.Hash();
32 }
33};
34
35} // namespace std
36
37namespace VideoCommon {
38
39template <typename SamplerType, typename SamplerStorageType>
40class SamplerCache {
41public:
42 SamplerType GetSampler(const Tegra::Texture::TSCEntry& tsc) {
43 const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc});
44 auto& sampler = entry->second;
45 if (is_cache_miss) {
46 sampler = CreateSampler(tsc);
47 }
48 return ToSamplerType(sampler);
49 }
50
51protected:
52 virtual SamplerStorageType CreateSampler(const Tegra::Texture::TSCEntry& tsc) const = 0;
53
54 virtual SamplerType ToSamplerType(const SamplerStorageType& sampler) const = 0;
55
56private:
57 std::unordered_map<SamplerCacheKey, SamplerStorageType> cache;
58};
59
60} // namespace VideoCommon \ No newline at end of file
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index 78245473c..9707136e9 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -134,13 +134,12 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
134} 134}
135 135
136void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, 136void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
137 const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler, 137 const Vulkan::Device& device, Vulkan::VKScheduler& scheduler,
138 Vulkan::VKDescriptorPool& descriptor_pool, 138 Vulkan::VKDescriptorPool& descriptor_pool,
139 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, 139 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
140 Vulkan::VKRenderPassCache& renderpass_cache,
141 std::vector<VkDescriptorSetLayoutBinding> bindings, 140 std::vector<VkDescriptorSetLayoutBinding> bindings,
142 Vulkan::SPIRVProgram program, 141 Vulkan::SPIRVProgram program,
143 Vulkan::GraphicsPipelineCacheKey key) { 142 Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) {
144 std::unique_lock lock(queue_mutex); 143 std::unique_lock lock(queue_mutex);
145 pending_queue.push({ 144 pending_queue.push({
146 .backend = Backend::Vulkan, 145 .backend = Backend::Vulkan,
@@ -149,10 +148,10 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
149 .scheduler = &scheduler, 148 .scheduler = &scheduler,
150 .descriptor_pool = &descriptor_pool, 149 .descriptor_pool = &descriptor_pool,
151 .update_descriptor_queue = &update_descriptor_queue, 150 .update_descriptor_queue = &update_descriptor_queue,
152 .renderpass_cache = &renderpass_cache,
153 .bindings = std::move(bindings), 151 .bindings = std::move(bindings),
154 .program = std::move(program), 152 .program = std::move(program),
155 .key = key, 153 .key = key,
154 .num_color_buffers = num_color_buffers,
156 }); 155 });
157 cv.notify_one(); 156 cv.notify_one();
158} 157}
@@ -205,8 +204,8 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
205 } else if (work.backend == Backend::Vulkan) { 204 } else if (work.backend == Backend::Vulkan) {
206 auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( 205 auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
207 *work.vk_device, *work.scheduler, *work.descriptor_pool, 206 *work.vk_device, *work.scheduler, *work.descriptor_pool,
208 *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings, 207 *work.update_descriptor_queue, work.key, work.bindings, work.program,
209 work.program); 208 work.num_color_buffers);
210 209
211 work.pp_cache->EmplacePipeline(std::move(pipeline)); 210 work.pp_cache->EmplacePipeline(std::move(pipeline));
212 } 211 }
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
index 5a7216019..0dbb1a31f 100644
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -24,9 +24,9 @@
24#include "video_core/renderer_opengl/gl_device.h" 24#include "video_core/renderer_opengl/gl_device.h"
25#include "video_core/renderer_opengl/gl_resource_manager.h" 25#include "video_core/renderer_opengl/gl_resource_manager.h"
26#include "video_core/renderer_opengl/gl_shader_decompiler.h" 26#include "video_core/renderer_opengl/gl_shader_decompiler.h"
27#include "video_core/renderer_vulkan/vk_device.h"
28#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 27#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
29#include "video_core/renderer_vulkan/vk_scheduler.h" 28#include "video_core/renderer_vulkan/vk_scheduler.h"
29#include "video_core/vulkan_common/vulkan_device.h"
30 30
31namespace Core::Frontend { 31namespace Core::Frontend {
32class EmuWindow; 32class EmuWindow;
@@ -94,13 +94,13 @@ public:
94 CompilerSettings compiler_settings, const Registry& registry, 94 CompilerSettings compiler_settings, const Registry& registry,
95 VAddr cpu_addr); 95 VAddr cpu_addr);
96 96
97 void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device, 97 void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device,
98 Vulkan::VKScheduler& scheduler, 98 Vulkan::VKScheduler& scheduler,
99 Vulkan::VKDescriptorPool& descriptor_pool, 99 Vulkan::VKDescriptorPool& descriptor_pool,
100 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, 100 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
101 Vulkan::VKRenderPassCache& renderpass_cache,
102 std::vector<VkDescriptorSetLayoutBinding> bindings, 101 std::vector<VkDescriptorSetLayoutBinding> bindings,
103 Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key); 102 Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key,
103 u32 num_color_buffers);
104 104
105private: 105private:
106 void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); 106 void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
@@ -123,14 +123,14 @@ private:
123 123
124 // For Vulkan 124 // For Vulkan
125 Vulkan::VKPipelineCache* pp_cache; 125 Vulkan::VKPipelineCache* pp_cache;
126 const Vulkan::VKDevice* vk_device; 126 const Vulkan::Device* vk_device;
127 Vulkan::VKScheduler* scheduler; 127 Vulkan::VKScheduler* scheduler;
128 Vulkan::VKDescriptorPool* descriptor_pool; 128 Vulkan::VKDescriptorPool* descriptor_pool;
129 Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; 129 Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
130 Vulkan::VKRenderPassCache* renderpass_cache;
131 std::vector<VkDescriptorSetLayoutBinding> bindings; 130 std::vector<VkDescriptorSetLayoutBinding> bindings;
132 Vulkan::SPIRVProgram program; 131 Vulkan::SPIRVProgram program;
133 Vulkan::GraphicsPipelineCacheKey key; 132 Vulkan::GraphicsPipelineCacheKey key;
133 u32 num_color_buffers;
134 }; 134 };
135 135
136 std::condition_variable cv; 136 std::condition_variable cv;
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index ab14c1aa3..6576d1208 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -25,7 +25,7 @@ using Tegra::Shader::OpCode;
25namespace { 25namespace {
26 26
27void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, 27void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
28 const std::list<Sampler>& used_samplers) { 28 const std::list<SamplerEntry>& used_samplers) {
29 if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { 29 if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
30 return; 30 return;
31 } 31 }
@@ -43,9 +43,9 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
43 } 43 }
44} 44}
45 45
46std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, 46std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce,
47 VideoCore::GuestDriverProfile& gpu_driver, 47 VideoCore::GuestDriverProfile& gpu_driver,
48 const std::list<Sampler>& used_samplers) { 48 const std::list<SamplerEntry>& used_samplers) {
49 const u32 base_offset = sampler_to_deduce.offset; 49 const u32 base_offset = sampler_to_deduce.offset;
50 u32 max_offset{std::numeric_limits<u32>::max()}; 50 u32 max_offset{std::numeric_limits<u32>::max()};
51 for (const auto& sampler : used_samplers) { 51 for (const auto& sampler : used_samplers) {
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index b2e88fa20..fa83108cd 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -22,13 +22,13 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
22 const Instruction instr = {program_code[pc]}; 22 const Instruction instr = {program_code[pc]};
23 const auto opcode = OpCode::Decode(instr); 23 const auto opcode = OpCode::Decode(instr);
24 24
25 PredCondition cond; 25 PredCondition cond{};
26 bool bf; 26 bool bf = false;
27 bool ftz; 27 bool ftz = false;
28 bool neg_a; 28 bool neg_a = false;
29 bool abs_a; 29 bool abs_a = false;
30 bool neg_b; 30 bool neg_b = false;
31 bool abs_b; 31 bool abs_b = false;
32 switch (opcode->get().GetId()) { 32 switch (opcode->get().GetId()) {
33 case OpCode::Id::HSET2_C: 33 case OpCode::Id::HSET2_C:
34 case OpCode::Id::HSET2_IMM: 34 case OpCode::Id::HSET2_IMM:
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 532f66d27..5470e8cf4 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -497,11 +497,12 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
497 return pc; 497 return pc;
498} 498}
499 499
500Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { 500ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
501 const auto offset = static_cast<u32>(image.index.Value()); 501 const auto offset = static_cast<u32>(image.index.Value());
502 502
503 const auto it = std::find_if(std::begin(used_images), std::end(used_images), 503 const auto it =
504 [offset](const Image& entry) { return entry.offset == offset; }); 504 std::find_if(std::begin(used_images), std::end(used_images),
505 [offset](const ImageEntry& entry) { return entry.offset == offset; });
505 if (it != std::end(used_images)) { 506 if (it != std::end(used_images)) {
506 ASSERT(!it->is_bindless && it->type == type); 507 ASSERT(!it->is_bindless && it->type == type);
507 return *it; 508 return *it;
@@ -511,7 +512,7 @@ Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType t
511 return used_images.emplace_back(next_index, offset, type); 512 return used_images.emplace_back(next_index, offset, type);
512} 513}
513 514
514Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { 515ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
515 const Node image_register = GetRegister(reg); 516 const Node image_register = GetRegister(reg);
516 const auto result = 517 const auto result =
517 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); 518 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
@@ -520,7 +521,7 @@ Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::Im
520 const auto offset = std::get<2>(result); 521 const auto offset = std::get<2>(result);
521 522
522 const auto it = std::find_if(std::begin(used_images), std::end(used_images), 523 const auto it = std::find_if(std::begin(used_images), std::end(used_images),
523 [buffer, offset](const Image& entry) { 524 [buffer, offset](const ImageEntry& entry) {
524 return entry.buffer == buffer && entry.offset == offset; 525 return entry.buffer == buffer && entry.offset == offset;
525 }); 526 });
526 if (it != std::end(used_images)) { 527 if (it != std::end(used_images)) {
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index fb18f631f..833fa2a39 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
141 141
142 SamplerInfo info; 142 SamplerInfo info;
143 info.is_shadow = is_depth_compare; 143 info.is_shadow = is_depth_compare;
144 const std::optional<Sampler> sampler = GetSampler(instr.sampler, info); 144 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
145 145
146 Node4 values; 146 Node4 values;
147 for (u32 element = 0; element < values.size(); ++element) { 147 for (u32 element = 0; element < values.size(); ++element) {
@@ -173,9 +173,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
173 SamplerInfo info; 173 SamplerInfo info;
174 info.type = texture_type; 174 info.type = texture_type;
175 info.is_array = is_array; 175 info.is_array = is_array;
176 const std::optional<Sampler> sampler = is_bindless 176 const std::optional<SamplerEntry> sampler =
177 ? GetBindlessSampler(base_reg, info, index_var) 177 is_bindless ? GetBindlessSampler(base_reg, info, index_var)
178 : GetSampler(instr.sampler, info); 178 : GetSampler(instr.sampler, info);
179 Node4 values; 179 Node4 values;
180 if (!sampler) { 180 if (!sampler) {
181 std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); 181 std::generate(values.begin(), values.end(), [this] { return Immediate(0); });
@@ -217,9 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
217 [[fallthrough]]; 217 [[fallthrough]];
218 case OpCode::Id::TXQ: { 218 case OpCode::Id::TXQ: {
219 Node index_var; 219 Node index_var;
220 const std::optional<Sampler> sampler = is_bindless 220 const std::optional<SamplerEntry> sampler =
221 ? GetBindlessSampler(instr.gpr8, {}, index_var) 221 is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var)
222 : GetSampler(instr.sampler, {}); 222 : GetSampler(instr.sampler, {});
223 223
224 if (!sampler) { 224 if (!sampler) {
225 u32 indexer = 0; 225 u32 indexer = 0;
@@ -272,7 +272,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
272 info.type = texture_type; 272 info.type = texture_type;
273 info.is_array = is_array; 273 info.is_array = is_array;
274 Node index_var; 274 Node index_var;
275 const std::optional<Sampler> sampler = 275 const std::optional<SamplerEntry> sampler =
276 is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) 276 is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var)
277 : GetSampler(instr.sampler, info); 277 : GetSampler(instr.sampler, info);
278 278
@@ -379,14 +379,15 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
379 return info; 379 return info;
380} 380}
381 381
382std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, 382std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
383 SamplerInfo sampler_info) { 383 SamplerInfo sampler_info) {
384 const u32 offset = static_cast<u32>(sampler.index.Value()); 384 const u32 offset = static_cast<u32>(sampler.index.Value());
385 const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); 385 const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
386 386
387 // If this sampler has already been used, return the existing mapping. 387 // If this sampler has already been used, return the existing mapping.
388 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), 388 const auto it =
389 [offset](const Sampler& entry) { return entry.offset == offset; }); 389 std::find_if(used_samplers.begin(), used_samplers.end(),
390 [offset](const SamplerEntry& entry) { return entry.offset == offset; });
390 if (it != used_samplers.end()) { 391 if (it != used_samplers.end()) {
391 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && 392 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
392 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); 393 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
@@ -399,8 +400,8 @@ std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
399 *info.is_shadow, *info.is_buffer, false); 400 *info.is_shadow, *info.is_buffer, false);
400} 401}
401 402
402std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, 403std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
403 Node& index_var) { 404 SamplerInfo info, Node& index_var) {
404 const Node sampler_register = GetRegister(reg); 405 const Node sampler_register = GetRegister(reg);
405 const auto [base_node, tracked_sampler_info] = 406 const auto [base_node, tracked_sampler_info] =
406 TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); 407 TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
@@ -416,7 +417,7 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
416 417
417 // If this sampler has already been used, return the existing mapping. 418 // If this sampler has already been used, return the existing mapping.
418 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), 419 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
419 [buffer, offset](const Sampler& entry) { 420 [buffer, offset](const SamplerEntry& entry) {
420 return entry.buffer == buffer && entry.offset == offset; 421 return entry.buffer == buffer && entry.offset == offset;
421 }); 422 });
422 if (it != used_samplers.end()) { 423 if (it != used_samplers.end()) {
@@ -436,11 +437,12 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
436 info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); 437 info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
437 438
438 // Try to use an already created sampler if it exists 439 // Try to use an already created sampler if it exists
439 const auto it = std::find_if( 440 const auto it =
440 used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) { 441 std::find_if(used_samplers.begin(), used_samplers.end(),
441 return offsets == std::pair{entry.offset, entry.secondary_offset} && 442 [indices, offsets](const SamplerEntry& entry) {
442 indices == std::pair{entry.buffer, entry.secondary_buffer}; 443 return offsets == std::pair{entry.offset, entry.secondary_offset} &&
443 }); 444 indices == std::pair{entry.buffer, entry.secondary_buffer};
445 });
444 if (it != used_samplers.end()) { 446 if (it != used_samplers.end()) {
445 ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && 447 ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
446 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); 448 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
@@ -460,7 +462,7 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
460 // If this sampler has already been used, return the existing mapping. 462 // If this sampler has already been used, return the existing mapping.
461 const auto it = std::find_if( 463 const auto it = std::find_if(
462 used_samplers.begin(), used_samplers.end(), 464 used_samplers.begin(), used_samplers.end(),
463 [base_offset](const Sampler& entry) { return entry.offset == base_offset; }); 465 [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; });
464 if (it != used_samplers.end()) { 466 if (it != used_samplers.end()) {
465 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && 467 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
466 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && 468 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer &&
@@ -565,9 +567,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
565 info.is_buffer = false; 567 info.is_buffer = false;
566 568
567 Node index_var; 569 Node index_var;
568 const std::optional<Sampler> sampler = is_bindless 570 const std::optional<SamplerEntry> sampler =
569 ? GetBindlessSampler(*bindless_reg, info, index_var) 571 is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var)
570 : GetSampler(instr.sampler, info); 572 : GetSampler(instr.sampler, info);
571 if (!sampler) { 573 if (!sampler) {
572 return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; 574 return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)};
573 } 575 }
@@ -724,7 +726,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
724 info.is_shadow = depth_compare; 726 info.is_shadow = depth_compare;
725 727
726 Node index_var; 728 Node index_var;
727 const std::optional<Sampler> sampler = 729 const std::optional<SamplerEntry> sampler =
728 is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) 730 is_bindless ? GetBindlessSampler(parameter_register++, info, index_var)
729 : GetSampler(instr.sampler, info); 731 : GetSampler(instr.sampler, info);
730 Node4 values; 732 Node4 values;
@@ -783,7 +785,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
783 // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; 785 // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
784 // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; 786 // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
785 787
786 const std::optional<Sampler> sampler = GetSampler(instr.sampler, {}); 788 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {});
787 789
788 Node4 values; 790 Node4 values;
789 for (u32 element = 0; element < values.size(); ++element) { 791 for (u32 element = 0; element < values.size(); ++element) {
@@ -800,7 +802,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
800 info.type = texture_type; 802 info.type = texture_type;
801 info.is_array = is_array; 803 info.is_array = is_array;
802 info.is_shadow = false; 804 info.is_shadow = false;
803 const std::optional<Sampler> sampler = GetSampler(instr.sampler, info); 805 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
804 806
805 const std::size_t type_coord_count = GetCoordCount(texture_type); 807 const std::size_t type_coord_count = GetCoordCount(texture_type);
806 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; 808 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 8db9e1de7..b54d33763 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -282,25 +282,24 @@ struct SeparateSamplerNode;
282using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>; 282using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
283using TrackSampler = std::shared_ptr<TrackSamplerData>; 283using TrackSampler = std::shared_ptr<TrackSamplerData>;
284 284
285struct Sampler { 285struct SamplerEntry {
286 /// Bound samplers constructor 286 /// Bound samplers constructor
287 constexpr explicit Sampler(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, 287 explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_,
288 bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) 288 bool is_shadow_, bool is_buffer_, bool is_indexed_)
289 : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, 289 : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_},
290 is_buffer{is_buffer_}, is_indexed{is_indexed_} {} 290 is_buffer{is_buffer_}, is_indexed{is_indexed_} {}
291 291
292 /// Separate sampler constructor 292 /// Separate sampler constructor
293 constexpr explicit Sampler(u32 index_, std::pair<u32, u32> offsets_, 293 explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
294 std::pair<u32, u32> buffers_, Tegra::Shader::TextureType type_, 294 Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_,
295 bool is_array_, bool is_shadow_, bool is_buffer_) 295 bool is_buffer_)
296 : index{index_}, offset{offsets_.first}, secondary_offset{offsets_.second}, 296 : index{index_}, offset{offsets.first}, secondary_offset{offsets.second},
297 buffer{buffers_.first}, secondary_buffer{buffers_.second}, type{type_}, 297 buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_},
298 is_array{is_array_}, is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} 298 is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {}
299 299
300 /// Bindless samplers constructor 300 /// Bindless samplers constructor
301 constexpr explicit Sampler(u32 index_, u32 offset_, u32 buffer_, 301 explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_,
302 Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, 302 bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_)
303 bool is_buffer_, bool is_indexed_)
304 : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, 303 : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_},
305 is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { 304 is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} {
306 } 305 }
@@ -340,14 +339,14 @@ struct BindlessSamplerNode {
340 u32 offset; 339 u32 offset;
341}; 340};
342 341
343struct Image { 342struct ImageEntry {
344public: 343public:
345 /// Bound images constructor 344 /// Bound images constructor
346 constexpr explicit Image(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) 345 explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_)
347 : index{index_}, offset{offset_}, type{type_} {} 346 : index{index_}, offset{offset_}, type{type_} {}
348 347
349 /// Bindless samplers constructor 348 /// Bindless samplers constructor
350 constexpr explicit Image(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) 349 explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_)
351 : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} 350 : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {}
352 351
353 void MarkWrite() { 352 void MarkWrite() {
@@ -391,7 +390,7 @@ struct MetaArithmetic {
391 390
392/// Parameters describing a texture sampler 391/// Parameters describing a texture sampler
393struct MetaTexture { 392struct MetaTexture {
394 Sampler sampler; 393 SamplerEntry sampler;
395 Node array; 394 Node array;
396 Node depth_compare; 395 Node depth_compare;
397 std::vector<Node> aoffi; 396 std::vector<Node> aoffi;
@@ -405,7 +404,7 @@ struct MetaTexture {
405}; 404};
406 405
407struct MetaImage { 406struct MetaImage {
408 const Image& image; 407 const ImageEntry& image;
409 std::vector<Node> values; 408 std::vector<Node> values;
410 u32 element{}; 409 u32 element{};
411}; 410};
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 6aae14e34..0c6ab0f07 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -94,11 +94,11 @@ public:
94 return used_cbufs; 94 return used_cbufs;
95 } 95 }
96 96
97 const std::list<Sampler>& GetSamplers() const { 97 const std::list<SamplerEntry>& GetSamplers() const {
98 return used_samplers; 98 return used_samplers;
99 } 99 }
100 100
101 const std::list<Image>& GetImages() const { 101 const std::list<ImageEntry>& GetImages() const {
102 return used_images; 102 return used_images;
103 } 103 }
104 104
@@ -334,17 +334,17 @@ private:
334 std::optional<Tegra::Engines::SamplerDescriptor> sampler); 334 std::optional<Tegra::Engines::SamplerDescriptor> sampler);
335 335
336 /// Accesses a texture sampler. 336 /// Accesses a texture sampler.
337 std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); 337 std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
338 338
339 /// Accesses a texture sampler for a bindless texture. 339 /// Accesses a texture sampler for a bindless texture.
340 std::optional<Sampler> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, 340 std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info,
341 Node& index_var); 341 Node& index_var);
342 342
343 /// Accesses an image. 343 /// Accesses an image.
344 Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); 344 ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
345 345
346 /// Access a bindless image sampler. 346 /// Access a bindless image sampler.
347 Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); 347 ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
348 348
349 /// Extracts a sequence of bits from a node 349 /// Extracts a sequence of bits from a node
350 Node BitfieldExtract(Node value, u32 offset, u32 bits); 350 Node BitfieldExtract(Node value, u32 offset, u32 bits);
@@ -454,8 +454,8 @@ private:
454 std::set<Tegra::Shader::Attribute::Index> used_input_attributes; 454 std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
455 std::set<Tegra::Shader::Attribute::Index> used_output_attributes; 455 std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
456 std::map<u32, ConstBuffer> used_cbufs; 456 std::map<u32, ConstBuffer> used_cbufs;
457 std::list<Sampler> used_samplers; 457 std::list<SamplerEntry> used_samplers;
458 std::list<Image> used_images; 458 std::list<ImageEntry> used_images;
459 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; 459 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
460 std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; 460 std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
461 bool uses_layer{}; 461 bool uses_layer{};
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 937e29d1e..6308aef94 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -280,7 +280,7 @@ bool IsPixelFormatSRGB(PixelFormat format) {
280} 280}
281 281
282std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { 282std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
283 return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)}; 283 return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
284} 284}
285 285
286} // namespace VideoCore::Surface 286} // namespace VideoCore::Surface
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index cfd12fa61..c40ab89d0 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -120,7 +120,7 @@ enum class PixelFormat {
120 Max = MaxDepthStencilFormat, 120 Max = MaxDepthStencilFormat,
121 Invalid = 255, 121 Invalid = 255,
122}; 122};
123static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max); 123constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max);
124 124
125enum class SurfaceType { 125enum class SurfaceType {
126 ColorTexture = 0, 126 ColorTexture = 0,
@@ -140,117 +140,7 @@ enum class SurfaceTarget {
140 TextureCubeArray, 140 TextureCubeArray,
141}; 141};
142 142
143constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ 143constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
144 0, // A8B8G8R8_UNORM
145 0, // A8B8G8R8_SNORM
146 0, // A8B8G8R8_SINT
147 0, // A8B8G8R8_UINT
148 0, // R5G6B5_UNORM
149 0, // B5G6R5_UNORM
150 0, // A1R5G5B5_UNORM
151 0, // A2B10G10R10_UNORM
152 0, // A2B10G10R10_UINT
153 0, // A1B5G5R5_UNORM
154 0, // R8_UNORM
155 0, // R8_SNORM
156 0, // R8_SINT
157 0, // R8_UINT
158 0, // R16G16B16A16_FLOAT
159 0, // R16G16B16A16_UNORM
160 0, // R16G16B16A16_SNORM
161 0, // R16G16B16A16_SINT
162 0, // R16G16B16A16_UINT
163 0, // B10G11R11_FLOAT
164 0, // R32G32B32A32_UINT
165 2, // BC1_RGBA_UNORM
166 2, // BC2_UNORM
167 2, // BC3_UNORM
168 2, // BC4_UNORM
169 2, // BC4_SNORM
170 2, // BC5_UNORM
171 2, // BC5_SNORM
172 2, // BC7_UNORM
173 2, // BC6H_UFLOAT
174 2, // BC6H_SFLOAT
175 2, // ASTC_2D_4X4_UNORM
176 0, // B8G8R8A8_UNORM
177 0, // R32G32B32A32_FLOAT
178 0, // R32G32B32A32_SINT
179 0, // R32G32_FLOAT
180 0, // R32G32_SINT
181 0, // R32_FLOAT
182 0, // R16_FLOAT
183 0, // R16_UNORM
184 0, // R16_SNORM
185 0, // R16_UINT
186 0, // R16_SINT
187 0, // R16G16_UNORM
188 0, // R16G16_FLOAT
189 0, // R16G16_UINT
190 0, // R16G16_SINT
191 0, // R16G16_SNORM
192 0, // R32G32B32_FLOAT
193 0, // A8B8G8R8_SRGB
194 0, // R8G8_UNORM
195 0, // R8G8_SNORM
196 0, // R8G8_SINT
197 0, // R8G8_UINT
198 0, // R32G32_UINT
199 0, // R16G16B16X16_FLOAT
200 0, // R32_UINT
201 0, // R32_SINT
202 2, // ASTC_2D_8X8_UNORM
203 2, // ASTC_2D_8X5_UNORM
204 2, // ASTC_2D_5X4_UNORM
205 0, // B8G8R8A8_SRGB
206 2, // BC1_RGBA_SRGB
207 2, // BC2_SRGB
208 2, // BC3_SRGB
209 2, // BC7_SRGB
210 0, // A4B4G4R4_UNORM
211 2, // ASTC_2D_4X4_SRGB
212 2, // ASTC_2D_8X8_SRGB
213 2, // ASTC_2D_8X5_SRGB
214 2, // ASTC_2D_5X4_SRGB
215 2, // ASTC_2D_5X5_UNORM
216 2, // ASTC_2D_5X5_SRGB
217 2, // ASTC_2D_10X8_UNORM
218 2, // ASTC_2D_10X8_SRGB
219 2, // ASTC_2D_6X6_UNORM
220 2, // ASTC_2D_6X6_SRGB
221 2, // ASTC_2D_10X10_UNORM
222 2, // ASTC_2D_10X10_SRGB
223 2, // ASTC_2D_12X12_UNORM
224 2, // ASTC_2D_12X12_SRGB
225 2, // ASTC_2D_8X6_UNORM
226 2, // ASTC_2D_8X6_SRGB
227 2, // ASTC_2D_6X5_UNORM
228 2, // ASTC_2D_6X5_SRGB
229 0, // E5B9G9R9_FLOAT
230 0, // D32_FLOAT
231 0, // D16_UNORM
232 0, // D24_UNORM_S8_UINT
233 0, // S8_UINT_D24_UNORM
234 0, // D32_FLOAT_S8_UINT
235}};
236
237/**
238 * Gets the compression factor for the specified PixelFormat. This applies to just the
239 * "compressed width" and "compressed height", not the overall compression factor of a
240 * compressed image. This is used for maintaining proper surface sizes for compressed
241 * texture formats.
242 */
243inline constexpr u32 GetCompressionFactorShift(PixelFormat format) {
244 DEBUG_ASSERT(format != PixelFormat::Invalid);
245 DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_factor_shift_table.size());
246 return compression_factor_shift_table[static_cast<std::size_t>(format)];
247}
248
249inline constexpr u32 GetCompressionFactor(PixelFormat format) {
250 return 1U << GetCompressionFactorShift(format);
251}
252
253constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
254 1, // A8B8G8R8_UNORM 144 1, // A8B8G8R8_UNORM
255 1, // A8B8G8R8_SNORM 145 1, // A8B8G8R8_SNORM
256 1, // A8B8G8R8_SINT 146 1, // A8B8G8R8_SINT
@@ -344,15 +234,12 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
344 1, // D32_FLOAT_S8_UINT 234 1, // D32_FLOAT_S8_UINT
345}}; 235}};
346 236
347static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { 237constexpr u32 DefaultBlockWidth(PixelFormat format) {
348 if (format == PixelFormat::Invalid) 238 ASSERT(static_cast<std::size_t>(format) < BLOCK_WIDTH_TABLE.size());
349 return 0; 239 return BLOCK_WIDTH_TABLE[static_cast<std::size_t>(format)];
350
351 ASSERT(static_cast<std::size_t>(format) < block_width_table.size());
352 return block_width_table[static_cast<std::size_t>(format)];
353} 240}
354 241
355constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ 242constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
356 1, // A8B8G8R8_UNORM 243 1, // A8B8G8R8_UNORM
357 1, // A8B8G8R8_SNORM 244 1, // A8B8G8R8_SNORM
358 1, // A8B8G8R8_SINT 245 1, // A8B8G8R8_SINT
@@ -446,15 +333,12 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
446 1, // D32_FLOAT_S8_UINT 333 1, // D32_FLOAT_S8_UINT
447}}; 334}};
448 335
449static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { 336constexpr u32 DefaultBlockHeight(PixelFormat format) {
450 if (format == PixelFormat::Invalid) 337 ASSERT(static_cast<std::size_t>(format) < BLOCK_HEIGHT_TABLE.size());
451 return 0; 338 return BLOCK_HEIGHT_TABLE[static_cast<std::size_t>(format)];
452
453 ASSERT(static_cast<std::size_t>(format) < block_height_table.size());
454 return block_height_table[static_cast<std::size_t>(format)];
455} 339}
456 340
457constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ 341constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
458 32, // A8B8G8R8_UNORM 342 32, // A8B8G8R8_UNORM
459 32, // A8B8G8R8_SNORM 343 32, // A8B8G8R8_SNORM
460 32, // A8B8G8R8_SINT 344 32, // A8B8G8R8_SINT
@@ -548,20 +432,14 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
548 64, // D32_FLOAT_S8_UINT 432 64, // D32_FLOAT_S8_UINT
549}}; 433}};
550 434
551static constexpr u32 GetFormatBpp(PixelFormat format) { 435constexpr u32 BitsPerBlock(PixelFormat format) {
552 if (format == PixelFormat::Invalid) 436 ASSERT(static_cast<std::size_t>(format) < BITS_PER_BLOCK_TABLE.size());
553 return 0; 437 return BITS_PER_BLOCK_TABLE[static_cast<std::size_t>(format)];
554
555 ASSERT(static_cast<std::size_t>(format) < bpp_table.size());
556 return bpp_table[static_cast<std::size_t>(format)];
557} 438}
558 439
559/// Returns the sizer in bytes of the specified pixel format 440/// Returns the sizer in bytes of the specified pixel format
560static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { 441constexpr u32 BytesPerBlock(PixelFormat pixel_format) {
561 if (pixel_format == PixelFormat::Invalid) { 442 return BitsPerBlock(pixel_format) / CHAR_BIT;
562 return 0;
563 }
564 return GetFormatBpp(pixel_format) / CHAR_BIT;
565} 443}
566 444
567SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); 445SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type);
diff --git a/src/video_core/texture_cache/accelerated_swizzle.cpp b/src/video_core/texture_cache/accelerated_swizzle.cpp
new file mode 100644
index 000000000..a4fc1184b
--- /dev/null
+++ b/src/video_core/texture_cache/accelerated_swizzle.cpp
@@ -0,0 +1,70 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <bit>
7
8#include "common/alignment.h"
9#include "common/common_types.h"
10#include "common/div_ceil.h"
11#include "video_core/surface.h"
12#include "video_core/texture_cache/accelerated_swizzle.h"
13#include "video_core/texture_cache/util.h"
14#include "video_core/textures/decoders.h"
15
16namespace VideoCommon::Accelerated {
17
18using Tegra::Texture::GOB_SIZE_SHIFT;
19using Tegra::Texture::GOB_SIZE_X;
20using Tegra::Texture::GOB_SIZE_X_SHIFT;
21using Tegra::Texture::GOB_SIZE_Y_SHIFT;
22using VideoCore::Surface::BytesPerBlock;
23
24BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameters& swizzle,
25 const ImageInfo& info) {
26 const Extent3D block = swizzle.block;
27 const Extent3D num_tiles = swizzle.num_tiles;
28 const u32 bytes_per_block = BytesPerBlock(info.format);
29 const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
30 const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
31 const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
32 return BlockLinearSwizzle2DParams{
33 .origin{0, 0, 0},
34 .destination{0, 0, 0},
35 .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)),
36 .layer_stride = info.layer_stride,
37 .block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth),
38 .x_shift = GOB_SIZE_SHIFT + block.height + block.depth,
39 .block_height = block.height,
40 .block_height_mask = (1U << block.height) - 1,
41 };
42}
43
44BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameters& swizzle,
45 const ImageInfo& info) {
46 const Extent3D block = swizzle.block;
47 const Extent3D num_tiles = swizzle.num_tiles;
48 const u32 bytes_per_block = BytesPerBlock(info.format);
49 const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
50 const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
51
52 const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT;
53 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth);
54 const u32 slice_size =
55 Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size;
56 return BlockLinearSwizzle3DParams{
57 .origin{0, 0, 0},
58 .destination{0, 0, 0},
59 .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)),
60 .slice_size = slice_size,
61 .block_size = block_size,
62 .x_shift = GOB_SIZE_SHIFT + block.height + block.depth,
63 .block_height = block.height,
64 .block_height_mask = (1U << block.height) - 1,
65 .block_depth = block.depth,
66 .block_depth_mask = (1U << block.depth) - 1,
67 };
68}
69
70} // namespace VideoCommon::Accelerated \ No newline at end of file
diff --git a/src/video_core/texture_cache/accelerated_swizzle.h b/src/video_core/texture_cache/accelerated_swizzle.h
new file mode 100644
index 000000000..6ec5c78c4
--- /dev/null
+++ b/src/video_core/texture_cache/accelerated_swizzle.h
@@ -0,0 +1,45 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8
9#include "common/common_types.h"
10#include "video_core/texture_cache/image_info.h"
11#include "video_core/texture_cache/types.h"
12
13namespace VideoCommon::Accelerated {
14
15struct BlockLinearSwizzle2DParams {
16 std::array<u32, 3> origin;
17 std::array<s32, 3> destination;
18 u32 bytes_per_block_log2;
19 u32 layer_stride;
20 u32 block_size;
21 u32 x_shift;
22 u32 block_height;
23 u32 block_height_mask;
24};
25
26struct BlockLinearSwizzle3DParams {
27 std::array<u32, 3> origin;
28 std::array<s32, 3> destination;
29 u32 bytes_per_block_log2;
30 u32 slice_size;
31 u32 block_size;
32 u32 x_shift;
33 u32 block_height;
34 u32 block_height_mask;
35 u32 block_depth;
36 u32 block_depth_mask;
37};
38
39[[nodiscard]] BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(
40 const SwizzleParameters& swizzle, const ImageInfo& info);
41
42[[nodiscard]] BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(
43 const SwizzleParameters& swizzle, const ImageInfo& info);
44
45} // namespace VideoCommon::Accelerated
diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h
deleted file mode 100644
index 5b475fe06..000000000
--- a/src/video_core/texture_cache/copy_params.h
+++ /dev/null
@@ -1,36 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCommon {
10
11struct CopyParams {
12 constexpr CopyParams(u32 source_x_, u32 source_y_, u32 source_z_, u32 dest_x_, u32 dest_y_,
13 u32 dest_z_, u32 source_level_, u32 dest_level_, u32 width_, u32 height_,
14 u32 depth_)
15 : source_x{source_x_}, source_y{source_y_}, source_z{source_z_}, dest_x{dest_x_},
16 dest_y{dest_y_}, dest_z{dest_z_}, source_level{source_level_},
17 dest_level{dest_level_}, width{width_}, height{height_}, depth{depth_} {}
18
19 constexpr CopyParams(u32 width_, u32 height_, u32 depth_, u32 level_)
20 : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level_},
21 dest_level{level_}, width{width_}, height{height_}, depth{depth_} {}
22
23 u32 source_x;
24 u32 source_y;
25 u32 source_z;
26 u32 dest_x;
27 u32 dest_y;
28 u32 dest_z;
29 u32 source_level;
30 u32 dest_level;
31 u32 width;
32 u32 height;
33 u32 depth;
34};
35
36} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp
new file mode 100644
index 000000000..017327975
--- /dev/null
+++ b/src/video_core/texture_cache/decode_bc4.cpp
@@ -0,0 +1,97 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <span>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/texture_cache/decode_bc4.h"
12#include "video_core/texture_cache/types.h"
13
14namespace VideoCommon {
15
16// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt
17[[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) {
18 const u32 code_offset = 16 + 3 * (4 * y + x);
19 const u32 code = (bits >> code_offset) & 7;
20 const u32 red0 = (bits >> 0) & 0xff;
21 const u32 red1 = (bits >> 8) & 0xff;
22 if (red0 > red1) {
23 switch (code) {
24 case 0:
25 return red0;
26 case 1:
27 return red1;
28 case 2:
29 return (6 * red0 + 1 * red1) / 7;
30 case 3:
31 return (5 * red0 + 2 * red1) / 7;
32 case 4:
33 return (4 * red0 + 3 * red1) / 7;
34 case 5:
35 return (3 * red0 + 4 * red1) / 7;
36 case 6:
37 return (2 * red0 + 5 * red1) / 7;
38 case 7:
39 return (1 * red0 + 6 * red1) / 7;
40 }
41 } else {
42 switch (code) {
43 case 0:
44 return red0;
45 case 1:
46 return red1;
47 case 2:
48 return (4 * red0 + 1 * red1) / 5;
49 case 3:
50 return (3 * red0 + 2 * red1) / 5;
51 case 4:
52 return (2 * red0 + 3 * red1) / 5;
53 case 5:
54 return (1 * red0 + 4 * red1) / 5;
55 case 6:
56 return 0;
57 case 7:
58 return 0xff;
59 }
60 }
61 return 0;
62}
63
64void DecompressBC4(std::span<const u8> input, Extent3D extent, std::span<u8> output) {
65 UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width);
66 UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height);
67 static constexpr u32 BLOCK_SIZE = 4;
68 size_t input_offset = 0;
69 for (u32 slice = 0; slice < extent.depth; ++slice) {
70 for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) {
71 for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) {
72 u64 bits;
73 std::memcpy(&bits, &input[input_offset], sizeof(bits));
74 input_offset += sizeof(bits);
75
76 for (u32 y = 0; y < BLOCK_SIZE; ++y) {
77 for (u32 x = 0; x < BLOCK_SIZE; ++x) {
78 const u32 linear_z = slice;
79 const u32 linear_y = block_y * BLOCK_SIZE + y;
80 const u32 linear_x = block_x * BLOCK_SIZE + x;
81 const u32 offset_z = linear_z * extent.width * extent.height;
82 const u32 offset_y = linear_y * extent.width;
83 const u32 offset_x = linear_x;
84 const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL;
85 const u32 color = DecompressBlock(bits, x, y);
86 output[output_offset + 0] = static_cast<u8>(color);
87 output[output_offset + 1] = 0;
88 output[output_offset + 2] = 0;
89 output[output_offset + 3] = 0xff;
90 }
91 }
92 }
93 }
94 }
95}
96
97} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc4.h
new file mode 100644
index 000000000..63fb23508
--- /dev/null
+++ b/src/video_core/texture_cache/decode_bc4.h
@@ -0,0 +1,16 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <span>
8
9#include "common/common_types.h"
10#include "video_core/texture_cache/types.h"
11
12namespace VideoCommon {
13
14void DecompressBC4(std::span<const u8> data, Extent3D extent, std::span<u8> output);
15
16} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h
new file mode 100644
index 000000000..3a03b786f
--- /dev/null
+++ b/src/video_core/texture_cache/descriptor_table.h
@@ -0,0 +1,82 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <vector>
9
10#include "common/common_types.h"
11#include "common/div_ceil.h"
12#include "common/logging/log.h"
13#include "video_core/memory_manager.h"
14#include "video_core/rasterizer_interface.h"
15
16namespace VideoCommon {
17
18template <typename Descriptor>
19class DescriptorTable {
20public:
21 explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {}
22
23 [[nodiscard]] bool Synchornize(GPUVAddr gpu_addr, u32 limit) {
24 [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) {
25 return false;
26 }
27 Refresh(gpu_addr, limit);
28 return true;
29 }
30
31 void Invalidate() noexcept {
32 std::ranges::fill(read_descriptors, 0);
33 }
34
35 [[nodiscard]] std::pair<Descriptor, bool> Read(u32 index) {
36 DEBUG_ASSERT(index <= current_limit);
37 const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor);
38 std::pair<Descriptor, bool> result;
39 gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor));
40 if (IsDescriptorRead(index)) {
41 result.second = result.first != descriptors[index];
42 } else {
43 MarkDescriptorAsRead(index);
44 result.second = true;
45 }
46 if (result.second) {
47 descriptors[index] = result.first;
48 }
49 return result;
50 }
51
52 [[nodiscard]] u32 Limit() const noexcept {
53 return current_limit;
54 }
55
56private:
57 void Refresh(GPUVAddr gpu_addr, u32 limit) {
58 current_gpu_addr = gpu_addr;
59 current_limit = limit;
60
61 const size_t num_descriptors = static_cast<size_t>(limit) + 1;
62 read_descriptors.clear();
63 read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0);
64 descriptors.resize(num_descriptors);
65 }
66
67 void MarkDescriptorAsRead(u32 index) noexcept {
68 read_descriptors[index / 64] |= 1ULL << (index % 64);
69 }
70
71 [[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept {
72 return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0;
73 }
74
75 Tegra::MemoryManager& gpu_memory;
76 GPUVAddr current_gpu_addr{};
77 u32 current_limit{};
78 std::vector<u64> read_descriptors;
79 std::vector<Descriptor> descriptors;
80};
81
82} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 7938d71eb..ddfb726fe 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -2,7 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include "common/common_types.h" 5#include "common/common_types.h"
7#include "common/logging/log.h" 6#include "common/logging/log.h"
8#include "video_core/texture_cache/format_lookup_table.h" 7#include "video_core/texture_cache/format_lookup_table.h"
@@ -20,198 +19,207 @@ constexpr auto UNORM = ComponentType::UNORM;
20constexpr auto SINT = ComponentType::SINT; 19constexpr auto SINT = ComponentType::SINT;
21constexpr auto UINT = ComponentType::UINT; 20constexpr auto UINT = ComponentType::UINT;
22constexpr auto FLOAT = ComponentType::FLOAT; 21constexpr auto FLOAT = ComponentType::FLOAT;
23constexpr bool C = false; // Normal color 22constexpr bool LINEAR = false;
24constexpr bool S = true; // Srgb 23constexpr bool SRGB = true;
25 24
26struct Table { 25constexpr u32 Hash(TextureFormat format, ComponentType red_component, ComponentType green_component,
27 constexpr Table(TextureFormat texture_format_, bool is_srgb_, ComponentType red_component_, 26 ComponentType blue_component, ComponentType alpha_component, bool is_srgb) {
28 ComponentType green_component_, ComponentType blue_component_, 27 u32 hash = is_srgb ? 1 : 0;
29 ComponentType alpha_component_, PixelFormat pixel_format_) 28 hash |= static_cast<u32>(red_component) << 1;
30 : texture_format{texture_format_}, pixel_format{pixel_format_}, 29 hash |= static_cast<u32>(green_component) << 4;
31 red_component{red_component_}, green_component{green_component_}, 30 hash |= static_cast<u32>(blue_component) << 7;
32 blue_component{blue_component_}, alpha_component{alpha_component_}, is_srgb{is_srgb_} {} 31 hash |= static_cast<u32>(alpha_component) << 10;
33 32 hash |= static_cast<u32>(format) << 13;
34 TextureFormat texture_format; 33 return hash;
35 PixelFormat pixel_format; 34}
36 ComponentType red_component;
37 ComponentType green_component;
38 ComponentType blue_component;
39 ComponentType alpha_component;
40 bool is_srgb;
41};
42constexpr std::array<Table, 86> DefinitionTable = {{
43 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM},
44 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM},
45 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT},
46 {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT},
47 {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB},
48
49 {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM},
50
51 {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM},
52 {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT},
53
54 {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM},
55
56 {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM},
57
58 {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM},
59 {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM},
60 {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT},
61 {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT},
62
63 {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM},
64 {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM},
65 {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT},
66 {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT},
67
68 {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM},
69 {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM},
70 {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT},
71 {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT},
72 {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT},
73
74 {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT},
75 {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM},
76 {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM},
77 {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT},
78 {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT},
79
80 {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT},
81 {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM},
82 {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM},
83 {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT},
84 {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT},
85
86 {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT},
87
88 {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT},
89 {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT},
90 {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT},
91
92 {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT},
93
94 {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT},
95 {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT},
96 {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT},
97
98 {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT},
99 {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT},
100 {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT},
101
102 {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT},
103
104 {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT},
105 {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM},
106 {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
107 {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
108 {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT},
109
110 {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM},
111 {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB},
112
113 {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM},
114 {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB},
115
116 {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM},
117 {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB},
118
119 {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM},
120 {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM},
121
122 {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM},
123 {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM},
124
125 {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM},
126 {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB},
127
128 {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT},
129 {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT},
130
131 {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM},
132 {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB},
133
134 {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM},
135 {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB},
136
137 {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM},
138 {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB},
139
140 {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM},
141 {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB},
142
143 {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM},
144 {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB},
145
146 {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM},
147 {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB},
148
149 {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM},
150 {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB},
151
152 {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM},
153 {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB},
154
155 {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM},
156 {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB},
157
158 {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM},
159 {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB},
160 35
161 {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM}, 36constexpr u32 Hash(TextureFormat format, ComponentType component, bool is_srgb = LINEAR) {
162 {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB}, 37 return Hash(format, component, component, component, component, is_srgb);
163}}; 38}
164 39
165} // Anonymous namespace 40} // Anonymous namespace
166 41
167FormatLookupTable::FormatLookupTable() { 42PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, ComponentType green,
168 table.fill(static_cast<u8>(PixelFormat::Invalid)); 43 ComponentType blue, ComponentType alpha,
169 44 bool is_srgb) noexcept {
170 for (const auto& entry : DefinitionTable) { 45 switch (Hash(format, red, green, blue, alpha, is_srgb)) {
171 table[CalculateIndex(entry.texture_format, entry.is_srgb != 0, entry.red_component, 46 case Hash(TextureFormat::A8R8G8B8, UNORM):
172 entry.green_component, entry.blue_component, entry.alpha_component)] = 47 return PixelFormat::A8B8G8R8_UNORM;
173 static_cast<u8>(entry.pixel_format); 48 case Hash(TextureFormat::A8R8G8B8, SNORM):
174 } 49 return PixelFormat::A8B8G8R8_SNORM;
175} 50 case Hash(TextureFormat::A8R8G8B8, UINT):
176 51 return PixelFormat::A8B8G8R8_UINT;
177PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb, 52 case Hash(TextureFormat::A8R8G8B8, SINT):
178 ComponentType red_component, 53 return PixelFormat::A8B8G8R8_SINT;
179 ComponentType green_component, 54 case Hash(TextureFormat::A8R8G8B8, UNORM, SRGB):
180 ComponentType blue_component, 55 return PixelFormat::A8B8G8R8_SRGB;
181 ComponentType alpha_component) const noexcept { 56 case Hash(TextureFormat::B5G6R5, UNORM):
182 const auto pixel_format = static_cast<PixelFormat>(table[CalculateIndex( 57 return PixelFormat::B5G6R5_UNORM;
183 format, is_srgb, red_component, green_component, blue_component, alpha_component)]); 58 case Hash(TextureFormat::A2B10G10R10, UNORM):
184 // [[likely]] 59 return PixelFormat::A2B10G10R10_UNORM;
185 if (pixel_format != PixelFormat::Invalid) { 60 case Hash(TextureFormat::A2B10G10R10, UINT):
186 return pixel_format; 61 return PixelFormat::A2B10G10R10_UINT;
62 case Hash(TextureFormat::A1B5G5R5, UNORM):
63 return PixelFormat::A1B5G5R5_UNORM;
64 case Hash(TextureFormat::A4B4G4R4, UNORM):
65 return PixelFormat::A4B4G4R4_UNORM;
66 case Hash(TextureFormat::R8, UNORM):
67 return PixelFormat::R8_UNORM;
68 case Hash(TextureFormat::R8, SNORM):
69 return PixelFormat::R8_SNORM;
70 case Hash(TextureFormat::R8, UINT):
71 return PixelFormat::R8_UINT;
72 case Hash(TextureFormat::R8, SINT):
73 return PixelFormat::R8_SINT;
74 case Hash(TextureFormat::R8G8, UNORM):
75 return PixelFormat::R8G8_UNORM;
76 case Hash(TextureFormat::R8G8, SNORM):
77 return PixelFormat::R8G8_SNORM;
78 case Hash(TextureFormat::R8G8, UINT):
79 return PixelFormat::R8G8_UINT;
80 case Hash(TextureFormat::R8G8, SINT):
81 return PixelFormat::R8G8_SINT;
82 case Hash(TextureFormat::R16G16B16A16, FLOAT):
83 return PixelFormat::R16G16B16A16_FLOAT;
84 case Hash(TextureFormat::R16G16B16A16, UNORM):
85 return PixelFormat::R16G16B16A16_UNORM;
86 case Hash(TextureFormat::R16G16B16A16, SNORM):
87 return PixelFormat::R16G16B16A16_SNORM;
88 case Hash(TextureFormat::R16G16B16A16, UINT):
89 return PixelFormat::R16G16B16A16_UINT;
90 case Hash(TextureFormat::R16G16B16A16, SINT):
91 return PixelFormat::R16G16B16A16_SINT;
92 case Hash(TextureFormat::R16G16, FLOAT):
93 return PixelFormat::R16G16_FLOAT;
94 case Hash(TextureFormat::R16G16, UNORM):
95 return PixelFormat::R16G16_UNORM;
96 case Hash(TextureFormat::R16G16, SNORM):
97 return PixelFormat::R16G16_SNORM;
98 case Hash(TextureFormat::R16G16, UINT):
99 return PixelFormat::R16G16_UINT;
100 case Hash(TextureFormat::R16G16, SINT):
101 return PixelFormat::R16G16_SINT;
102 case Hash(TextureFormat::R16, FLOAT):
103 return PixelFormat::R16_FLOAT;
104 case Hash(TextureFormat::R16, UNORM):
105 return PixelFormat::R16_UNORM;
106 case Hash(TextureFormat::R16, SNORM):
107 return PixelFormat::R16_SNORM;
108 case Hash(TextureFormat::R16, UINT):
109 return PixelFormat::R16_UINT;
110 case Hash(TextureFormat::R16, SINT):
111 return PixelFormat::R16_SINT;
112 case Hash(TextureFormat::B10G11R11, FLOAT):
113 return PixelFormat::B10G11R11_FLOAT;
114 case Hash(TextureFormat::R32G32B32A32, FLOAT):
115 return PixelFormat::R32G32B32A32_FLOAT;
116 case Hash(TextureFormat::R32G32B32A32, UINT):
117 return PixelFormat::R32G32B32A32_UINT;
118 case Hash(TextureFormat::R32G32B32A32, SINT):
119 return PixelFormat::R32G32B32A32_SINT;
120 case Hash(TextureFormat::R32G32B32, FLOAT):
121 return PixelFormat::R32G32B32_FLOAT;
122 case Hash(TextureFormat::R32G32, FLOAT):
123 return PixelFormat::R32G32_FLOAT;
124 case Hash(TextureFormat::R32G32, UINT):
125 return PixelFormat::R32G32_UINT;
126 case Hash(TextureFormat::R32G32, SINT):
127 return PixelFormat::R32G32_SINT;
128 case Hash(TextureFormat::R32, FLOAT):
129 return PixelFormat::R32_FLOAT;
130 case Hash(TextureFormat::R32, UINT):
131 return PixelFormat::R32_UINT;
132 case Hash(TextureFormat::R32, SINT):
133 return PixelFormat::R32_SINT;
134 case Hash(TextureFormat::E5B9G9R9, FLOAT):
135 return PixelFormat::E5B9G9R9_FLOAT;
136 case Hash(TextureFormat::D32, FLOAT):
137 return PixelFormat::D32_FLOAT;
138 case Hash(TextureFormat::D16, UNORM):
139 return PixelFormat::D16_UNORM;
140 case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR):
141 return PixelFormat::S8_UINT_D24_UNORM;
142 case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR):
143 return PixelFormat::S8_UINT_D24_UNORM;
144 case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR):
145 return PixelFormat::D32_FLOAT_S8_UINT;
146 case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR):
147 return PixelFormat::BC1_RGBA_UNORM;
148 case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB):
149 return PixelFormat::BC1_RGBA_SRGB;
150 case Hash(TextureFormat::BC2, UNORM, LINEAR):
151 return PixelFormat::BC2_UNORM;
152 case Hash(TextureFormat::BC2, UNORM, SRGB):
153 return PixelFormat::BC2_SRGB;
154 case Hash(TextureFormat::BC3, UNORM, LINEAR):
155 return PixelFormat::BC3_UNORM;
156 case Hash(TextureFormat::BC3, UNORM, SRGB):
157 return PixelFormat::BC3_SRGB;
158 case Hash(TextureFormat::BC4, UNORM):
159 return PixelFormat::BC4_UNORM;
160 case Hash(TextureFormat::BC4, SNORM):
161 return PixelFormat::BC4_SNORM;
162 case Hash(TextureFormat::BC5, UNORM):
163 return PixelFormat::BC5_UNORM;
164 case Hash(TextureFormat::BC5, SNORM):
165 return PixelFormat::BC5_SNORM;
166 case Hash(TextureFormat::BC7, UNORM, LINEAR):
167 return PixelFormat::BC7_UNORM;
168 case Hash(TextureFormat::BC7, UNORM, SRGB):
169 return PixelFormat::BC7_SRGB;
170 case Hash(TextureFormat::BC6H_SFLOAT, FLOAT):
171 return PixelFormat::BC6H_SFLOAT;
172 case Hash(TextureFormat::BC6H_UFLOAT, FLOAT):
173 return PixelFormat::BC6H_UFLOAT;
174 case Hash(TextureFormat::ASTC_2D_4X4, UNORM, LINEAR):
175 return PixelFormat::ASTC_2D_4X4_UNORM;
176 case Hash(TextureFormat::ASTC_2D_4X4, UNORM, SRGB):
177 return PixelFormat::ASTC_2D_4X4_SRGB;
178 case Hash(TextureFormat::ASTC_2D_5X4, UNORM, LINEAR):
179 return PixelFormat::ASTC_2D_5X4_UNORM;
180 case Hash(TextureFormat::ASTC_2D_5X4, UNORM, SRGB):
181 return PixelFormat::ASTC_2D_5X4_SRGB;
182 case Hash(TextureFormat::ASTC_2D_5X5, UNORM, LINEAR):
183 return PixelFormat::ASTC_2D_5X5_UNORM;
184 case Hash(TextureFormat::ASTC_2D_5X5, UNORM, SRGB):
185 return PixelFormat::ASTC_2D_5X5_SRGB;
186 case Hash(TextureFormat::ASTC_2D_8X8, UNORM, LINEAR):
187 return PixelFormat::ASTC_2D_8X8_UNORM;
188 case Hash(TextureFormat::ASTC_2D_8X8, UNORM, SRGB):
189 return PixelFormat::ASTC_2D_8X8_SRGB;
190 case Hash(TextureFormat::ASTC_2D_8X5, UNORM, LINEAR):
191 return PixelFormat::ASTC_2D_8X5_UNORM;
192 case Hash(TextureFormat::ASTC_2D_8X5, UNORM, SRGB):
193 return PixelFormat::ASTC_2D_8X5_SRGB;
194 case Hash(TextureFormat::ASTC_2D_10X8, UNORM, LINEAR):
195 return PixelFormat::ASTC_2D_10X8_UNORM;
196 case Hash(TextureFormat::ASTC_2D_10X8, UNORM, SRGB):
197 return PixelFormat::ASTC_2D_10X8_SRGB;
198 case Hash(TextureFormat::ASTC_2D_6X6, UNORM, LINEAR):
199 return PixelFormat::ASTC_2D_6X6_UNORM;
200 case Hash(TextureFormat::ASTC_2D_6X6, UNORM, SRGB):
201 return PixelFormat::ASTC_2D_6X6_SRGB;
202 case Hash(TextureFormat::ASTC_2D_10X10, UNORM, LINEAR):
203 return PixelFormat::ASTC_2D_10X10_UNORM;
204 case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB):
205 return PixelFormat::ASTC_2D_10X10_SRGB;
206 case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR):
207 return PixelFormat::ASTC_2D_12X12_UNORM;
208 case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB):
209 return PixelFormat::ASTC_2D_12X12_SRGB;
210 case Hash(TextureFormat::ASTC_2D_8X6, UNORM, LINEAR):
211 return PixelFormat::ASTC_2D_8X6_UNORM;
212 case Hash(TextureFormat::ASTC_2D_8X6, UNORM, SRGB):
213 return PixelFormat::ASTC_2D_8X6_SRGB;
214 case Hash(TextureFormat::ASTC_2D_6X5, UNORM, LINEAR):
215 return PixelFormat::ASTC_2D_6X5_UNORM;
216 case Hash(TextureFormat::ASTC_2D_6X5, UNORM, SRGB):
217 return PixelFormat::ASTC_2D_6X5_SRGB;
187 } 218 }
188 UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}", 219 UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}",
189 static_cast<int>(format), is_srgb, static_cast<int>(red_component), 220 static_cast<int>(format), is_srgb, static_cast<int>(red),
190 static_cast<int>(green_component), static_cast<int>(blue_component), 221 static_cast<int>(green), static_cast<int>(blue), static_cast<int>(alpha));
191 static_cast<int>(alpha_component));
192 return PixelFormat::A8B8G8R8_UNORM; 222 return PixelFormat::A8B8G8R8_UNORM;
193} 223}
194 224
195void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component,
196 ComponentType green_component, ComponentType blue_component,
197 ComponentType alpha_component, PixelFormat pixel_format) {}
198
199std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb,
200 ComponentType red_component,
201 ComponentType green_component,
202 ComponentType blue_component,
203 ComponentType alpha_component) noexcept {
204 const auto format_index = static_cast<std::size_t>(format);
205 const auto red_index = static_cast<std::size_t>(red_component);
206 const auto green_index = static_cast<std::size_t>(green_component);
207 const auto blue_index = static_cast<std::size_t>(blue_component);
208 const auto alpha_index = static_cast<std::size_t>(alpha_component);
209 const std::size_t srgb_index = is_srgb ? 1 : 0;
210
211 return format_index * PerFormat +
212 srgb_index * PerComponent * PerComponent * PerComponent * PerComponent +
213 alpha_index * PerComponent * PerComponent * PerComponent +
214 blue_index * PerComponent * PerComponent + green_index * PerComponent + red_index;
215}
216
217} // namespace VideoCommon 225} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/format_lookup_table.h b/src/video_core/texture_cache/format_lookup_table.h
index aa77e0a5a..729533999 100644
--- a/src/video_core/texture_cache/format_lookup_table.h
+++ b/src/video_core/texture_cache/format_lookup_table.h
@@ -4,48 +4,14 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <limits>
9#include "video_core/surface.h" 7#include "video_core/surface.h"
10#include "video_core/textures/texture.h" 8#include "video_core/textures/texture.h"
11 9
12namespace VideoCommon { 10namespace VideoCommon {
13 11
14class FormatLookupTable { 12VideoCore::Surface::PixelFormat PixelFormatFromTextureInfo(
15public: 13 Tegra::Texture::TextureFormat format, Tegra::Texture::ComponentType red_component,
16 explicit FormatLookupTable(); 14 Tegra::Texture::ComponentType green_component, Tegra::Texture::ComponentType blue_component,
17 15 Tegra::Texture::ComponentType alpha_component, bool is_srgb) noexcept;
18 VideoCore::Surface::PixelFormat GetPixelFormat(
19 Tegra::Texture::TextureFormat format, bool is_srgb,
20 Tegra::Texture::ComponentType red_component, Tegra::Texture::ComponentType green_component,
21 Tegra::Texture::ComponentType blue_component,
22 Tegra::Texture::ComponentType alpha_component) const noexcept;
23
24private:
25 static_assert(VideoCore::Surface::MaxPixelFormat <= std::numeric_limits<u8>::max());
26
27 static constexpr std::size_t NumTextureFormats = 128;
28
29 static constexpr std::size_t PerComponent = 8;
30 static constexpr std::size_t PerComponents2 = PerComponent * PerComponent;
31 static constexpr std::size_t PerComponents3 = PerComponents2 * PerComponent;
32 static constexpr std::size_t PerComponents4 = PerComponents3 * PerComponent;
33 static constexpr std::size_t PerFormat = PerComponents4 * 2;
34
35 static std::size_t CalculateIndex(Tegra::Texture::TextureFormat format, bool is_srgb,
36 Tegra::Texture::ComponentType red_component,
37 Tegra::Texture::ComponentType green_component,
38 Tegra::Texture::ComponentType blue_component,
39 Tegra::Texture::ComponentType alpha_component) noexcept;
40
41 void Set(Tegra::Texture::TextureFormat format, bool is_srgb,
42 Tegra::Texture::ComponentType red_component,
43 Tegra::Texture::ComponentType green_component,
44 Tegra::Texture::ComponentType blue_component,
45 Tegra::Texture::ComponentType alpha_component,
46 VideoCore::Surface::PixelFormat pixel_format);
47
48 std::array<u8, NumTextureFormats * PerFormat> table;
49};
50 16
51} // namespace VideoCommon 17} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
new file mode 100644
index 000000000..d10ba4ccd
--- /dev/null
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -0,0 +1,95 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <string>
7
8#include "video_core/texture_cache/formatter.h"
9#include "video_core/texture_cache/image_base.h"
10#include "video_core/texture_cache/image_info.h"
11#include "video_core/texture_cache/image_view_base.h"
12#include "video_core/texture_cache/render_targets.h"
13
14namespace VideoCommon {
15
16std::string Name(const ImageBase& image) {
17 const GPUVAddr gpu_addr = image.gpu_addr;
18 const ImageInfo& info = image.info;
19 const u32 width = info.size.width;
20 const u32 height = info.size.height;
21 const u32 depth = info.size.depth;
22 const u32 num_layers = image.info.resources.layers;
23 const u32 num_levels = image.info.resources.levels;
24 std::string resource;
25 if (num_layers > 1) {
26 resource += fmt::format(":L{}", num_layers);
27 }
28 if (num_levels > 1) {
29 resource += fmt::format(":M{}", num_levels);
30 }
31 switch (image.info.type) {
32 case ImageType::e1D:
33 return fmt::format("Image 1D 0x{:x} {}{}", gpu_addr, width, resource);
34 case ImageType::e2D:
35 return fmt::format("Image 2D 0x{:x} {}x{}{}", gpu_addr, width, height, resource);
36 case ImageType::e3D:
37 return fmt::format("Image 2D 0x{:x} {}x{}x{}{}", gpu_addr, width, height, depth, resource);
38 case ImageType::Linear:
39 return fmt::format("Image Linear 0x{:x} {}x{}", gpu_addr, width, height);
40 case ImageType::Buffer:
41 return fmt::format("Buffer 0x{:x} {}", image.gpu_addr, image.info.size.width);
42 }
43 return "Invalid";
44}
45
46std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) {
47 const u32 width = image_view.size.width;
48 const u32 height = image_view.size.height;
49 const u32 depth = image_view.size.depth;
50 const u32 num_levels = image_view.range.extent.levels;
51 const u32 num_layers = image_view.range.extent.layers;
52
53 const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : "";
54 switch (type.value_or(image_view.type)) {
55 case ImageViewType::e1D:
56 return fmt::format("ImageView 1D {}{}", width, level);
57 case ImageViewType::e2D:
58 return fmt::format("ImageView 2D {}x{}{}", width, height, level);
59 case ImageViewType::Cube:
60 return fmt::format("ImageView Cube {}x{}{}", width, height, level);
61 case ImageViewType::e3D:
62 return fmt::format("ImageView 3D {}x{}x{}{}", width, height, depth, level);
63 case ImageViewType::e1DArray:
64 return fmt::format("ImageView 1DArray {}{}|{}", width, level, num_layers);
65 case ImageViewType::e2DArray:
66 return fmt::format("ImageView 2DArray {}x{}{}|{}", width, height, level, num_layers);
67 case ImageViewType::CubeArray:
68 return fmt::format("ImageView CubeArray {}x{}{}|{}", width, height, level, num_layers);
69 case ImageViewType::Rect:
70 return fmt::format("ImageView Rect {}x{}{}", width, height, level);
71 case ImageViewType::Buffer:
72 return fmt::format("BufferView {}", width);
73 }
74 return "Invalid";
75}
76
77std::string Name(const RenderTargets& render_targets) {
78 std::string_view debug_prefix;
79 const auto num_color = std::ranges::count_if(
80 render_targets.color_buffer_ids, [](ImageViewId id) { return static_cast<bool>(id); });
81 if (render_targets.depth_buffer_id) {
82 debug_prefix = num_color > 0 ? "R" : "Z";
83 } else {
84 debug_prefix = num_color > 0 ? "C" : "X";
85 }
86 const Extent2D size = render_targets.size;
87 if (num_color > 0) {
88 return fmt::format("Framebuffer {}{} {}x{}", debug_prefix, num_color, size.width,
89 size.height);
90 } else {
91 return fmt::format("Framebuffer {} {}x{}", debug_prefix, size.width, size.height);
92 }
93}
94
95} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
new file mode 100644
index 000000000..a48413983
--- /dev/null
+++ b/src/video_core/texture_cache/formatter.h
@@ -0,0 +1,263 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8
9#include <fmt/format.h>
10
11#include "video_core/surface.h"
12#include "video_core/texture_cache/types.h"
13
14template <>
15struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::string_view> {
16 template <typename FormatContext>
17 auto format(VideoCore::Surface::PixelFormat format, FormatContext& ctx) {
18 using VideoCore::Surface::PixelFormat;
19 const string_view name = [format] {
20 switch (format) {
21 case PixelFormat::A8B8G8R8_UNORM:
22 return "A8B8G8R8_UNORM";
23 case PixelFormat::A8B8G8R8_SNORM:
24 return "A8B8G8R8_SNORM";
25 case PixelFormat::A8B8G8R8_SINT:
26 return "A8B8G8R8_SINT";
27 case PixelFormat::A8B8G8R8_UINT:
28 return "A8B8G8R8_UINT";
29 case PixelFormat::R5G6B5_UNORM:
30 return "R5G6B5_UNORM";
31 case PixelFormat::B5G6R5_UNORM:
32 return "B5G6R5_UNORM";
33 case PixelFormat::A1R5G5B5_UNORM:
34 return "A1R5G5B5_UNORM";
35 case PixelFormat::A2B10G10R10_UNORM:
36 return "A2B10G10R10_UNORM";
37 case PixelFormat::A2B10G10R10_UINT:
38 return "A2B10G10R10_UINT";
39 case PixelFormat::A1B5G5R5_UNORM:
40 return "A1B5G5R5_UNORM";
41 case PixelFormat::R8_UNORM:
42 return "R8_UNORM";
43 case PixelFormat::R8_SNORM:
44 return "R8_SNORM";
45 case PixelFormat::R8_SINT:
46 return "R8_SINT";
47 case PixelFormat::R8_UINT:
48 return "R8_UINT";
49 case PixelFormat::R16G16B16A16_FLOAT:
50 return "R16G16B16A16_FLOAT";
51 case PixelFormat::R16G16B16A16_UNORM:
52 return "R16G16B16A16_UNORM";
53 case PixelFormat::R16G16B16A16_SNORM:
54 return "R16G16B16A16_SNORM";
55 case PixelFormat::R16G16B16A16_SINT:
56 return "R16G16B16A16_SINT";
57 case PixelFormat::R16G16B16A16_UINT:
58 return "R16G16B16A16_UINT";
59 case PixelFormat::B10G11R11_FLOAT:
60 return "B10G11R11_FLOAT";
61 case PixelFormat::R32G32B32A32_UINT:
62 return "R32G32B32A32_UINT";
63 case PixelFormat::BC1_RGBA_UNORM:
64 return "BC1_RGBA_UNORM";
65 case PixelFormat::BC2_UNORM:
66 return "BC2_UNORM";
67 case PixelFormat::BC3_UNORM:
68 return "BC3_UNORM";
69 case PixelFormat::BC4_UNORM:
70 return "BC4_UNORM";
71 case PixelFormat::BC4_SNORM:
72 return "BC4_SNORM";
73 case PixelFormat::BC5_UNORM:
74 return "BC5_UNORM";
75 case PixelFormat::BC5_SNORM:
76 return "BC5_SNORM";
77 case PixelFormat::BC7_UNORM:
78 return "BC7_UNORM";
79 case PixelFormat::BC6H_UFLOAT:
80 return "BC6H_UFLOAT";
81 case PixelFormat::BC6H_SFLOAT:
82 return "BC6H_SFLOAT";
83 case PixelFormat::ASTC_2D_4X4_UNORM:
84 return "ASTC_2D_4X4_UNORM";
85 case PixelFormat::B8G8R8A8_UNORM:
86 return "B8G8R8A8_UNORM";
87 case PixelFormat::R32G32B32A32_FLOAT:
88 return "R32G32B32A32_FLOAT";
89 case PixelFormat::R32G32B32A32_SINT:
90 return "R32G32B32A32_SINT";
91 case PixelFormat::R32G32_FLOAT:
92 return "R32G32_FLOAT";
93 case PixelFormat::R32G32_SINT:
94 return "R32G32_SINT";
95 case PixelFormat::R32_FLOAT:
96 return "R32_FLOAT";
97 case PixelFormat::R16_FLOAT:
98 return "R16_FLOAT";
99 case PixelFormat::R16_UNORM:
100 return "R16_UNORM";
101 case PixelFormat::R16_SNORM:
102 return "R16_SNORM";
103 case PixelFormat::R16_UINT:
104 return "R16_UINT";
105 case PixelFormat::R16_SINT:
106 return "R16_SINT";
107 case PixelFormat::R16G16_UNORM:
108 return "R16G16_UNORM";
109 case PixelFormat::R16G16_FLOAT:
110 return "R16G16_FLOAT";
111 case PixelFormat::R16G16_UINT:
112 return "R16G16_UINT";
113 case PixelFormat::R16G16_SINT:
114 return "R16G16_SINT";
115 case PixelFormat::R16G16_SNORM:
116 return "R16G16_SNORM";
117 case PixelFormat::R32G32B32_FLOAT:
118 return "R32G32B32_FLOAT";
119 case PixelFormat::A8B8G8R8_SRGB:
120 return "A8B8G8R8_SRGB";
121 case PixelFormat::R8G8_UNORM:
122 return "R8G8_UNORM";
123 case PixelFormat::R8G8_SNORM:
124 return "R8G8_SNORM";
125 case PixelFormat::R8G8_SINT:
126 return "R8G8_SINT";
127 case PixelFormat::R8G8_UINT:
128 return "R8G8_UINT";
129 case PixelFormat::R32G32_UINT:
130 return "R32G32_UINT";
131 case PixelFormat::R16G16B16X16_FLOAT:
132 return "R16G16B16X16_FLOAT";
133 case PixelFormat::R32_UINT:
134 return "R32_UINT";
135 case PixelFormat::R32_SINT:
136 return "R32_SINT";
137 case PixelFormat::ASTC_2D_8X8_UNORM:
138 return "ASTC_2D_8X8_UNORM";
139 case PixelFormat::ASTC_2D_8X5_UNORM:
140 return "ASTC_2D_8X5_UNORM";
141 case PixelFormat::ASTC_2D_5X4_UNORM:
142 return "ASTC_2D_5X4_UNORM";
143 case PixelFormat::B8G8R8A8_SRGB:
144 return "B8G8R8A8_SRGB";
145 case PixelFormat::BC1_RGBA_SRGB:
146 return "BC1_RGBA_SRGB";
147 case PixelFormat::BC2_SRGB:
148 return "BC2_SRGB";
149 case PixelFormat::BC3_SRGB:
150 return "BC3_SRGB";
151 case PixelFormat::BC7_SRGB:
152 return "BC7_SRGB";
153 case PixelFormat::A4B4G4R4_UNORM:
154 return "A4B4G4R4_UNORM";
155 case PixelFormat::ASTC_2D_4X4_SRGB:
156 return "ASTC_2D_4X4_SRGB";
157 case PixelFormat::ASTC_2D_8X8_SRGB:
158 return "ASTC_2D_8X8_SRGB";
159 case PixelFormat::ASTC_2D_8X5_SRGB:
160 return "ASTC_2D_8X5_SRGB";
161 case PixelFormat::ASTC_2D_5X4_SRGB:
162 return "ASTC_2D_5X4_SRGB";
163 case PixelFormat::ASTC_2D_5X5_UNORM:
164 return "ASTC_2D_5X5_UNORM";
165 case PixelFormat::ASTC_2D_5X5_SRGB:
166 return "ASTC_2D_5X5_SRGB";
167 case PixelFormat::ASTC_2D_10X8_UNORM:
168 return "ASTC_2D_10X8_UNORM";
169 case PixelFormat::ASTC_2D_10X8_SRGB:
170 return "ASTC_2D_10X8_SRGB";
171 case PixelFormat::ASTC_2D_6X6_UNORM:
172 return "ASTC_2D_6X6_UNORM";
173 case PixelFormat::ASTC_2D_6X6_SRGB:
174 return "ASTC_2D_6X6_SRGB";
175 case PixelFormat::ASTC_2D_10X10_UNORM:
176 return "ASTC_2D_10X10_UNORM";
177 case PixelFormat::ASTC_2D_10X10_SRGB:
178 return "ASTC_2D_10X10_SRGB";
179 case PixelFormat::ASTC_2D_12X12_UNORM:
180 return "ASTC_2D_12X12_UNORM";
181 case PixelFormat::ASTC_2D_12X12_SRGB:
182 return "ASTC_2D_12X12_SRGB";
183 case PixelFormat::ASTC_2D_8X6_UNORM:
184 return "ASTC_2D_8X6_UNORM";
185 case PixelFormat::ASTC_2D_8X6_SRGB:
186 return "ASTC_2D_8X6_SRGB";
187 case PixelFormat::ASTC_2D_6X5_UNORM:
188 return "ASTC_2D_6X5_UNORM";
189 case PixelFormat::ASTC_2D_6X5_SRGB:
190 return "ASTC_2D_6X5_SRGB";
191 case PixelFormat::E5B9G9R9_FLOAT:
192 return "E5B9G9R9_FLOAT";
193 case PixelFormat::D32_FLOAT:
194 return "D32_FLOAT";
195 case PixelFormat::D16_UNORM:
196 return "D16_UNORM";
197 case PixelFormat::D24_UNORM_S8_UINT:
198 return "D24_UNORM_S8_UINT";
199 case PixelFormat::S8_UINT_D24_UNORM:
200 return "S8_UINT_D24_UNORM";
201 case PixelFormat::D32_FLOAT_S8_UINT:
202 return "D32_FLOAT_S8_UINT";
203 case PixelFormat::MaxDepthStencilFormat:
204 case PixelFormat::Invalid:
205 return "Invalid";
206 }
207 return "Invalid";
208 }();
209 return formatter<string_view>::format(name, ctx);
210 }
211};
212
213template <>
214struct fmt::formatter<VideoCommon::ImageType> : fmt::formatter<fmt::string_view> {
215 template <typename FormatContext>
216 auto format(VideoCommon::ImageType type, FormatContext& ctx) {
217 const string_view name = [type] {
218 using VideoCommon::ImageType;
219 switch (type) {
220 case ImageType::e1D:
221 return "1D";
222 case ImageType::e2D:
223 return "2D";
224 case ImageType::e3D:
225 return "3D";
226 case ImageType::Linear:
227 return "Linear";
228 case ImageType::Buffer:
229 return "Buffer";
230 }
231 return "Invalid";
232 }();
233 return formatter<string_view>::format(name, ctx);
234 }
235};
236
237template <>
238struct fmt::formatter<VideoCommon::Extent3D> {
239 constexpr auto parse(fmt::format_parse_context& ctx) {
240 return ctx.begin();
241 }
242
243 template <typename FormatContext>
244 auto format(const VideoCommon::Extent3D& extent, FormatContext& ctx) {
245 return fmt::format_to(ctx.out(), "{{{}, {}, {}}}", extent.width, extent.height,
246 extent.depth);
247 }
248};
249
250namespace VideoCommon {
251
252struct ImageBase;
253struct ImageViewBase;
254struct RenderTargets;
255
256[[nodiscard]] std::string Name(const ImageBase& image);
257
258[[nodiscard]] std::string Name(const ImageViewBase& image_view,
259 std::optional<ImageViewType> type = std::nullopt);
260
261[[nodiscard]] std::string Name(const RenderTargets& render_targets);
262
263} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
new file mode 100644
index 000000000..959b3f115
--- /dev/null
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -0,0 +1,218 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <optional>
7#include <utility>
8#include <vector>
9
10#include "common/common_types.h"
11#include "video_core/surface.h"
12#include "video_core/texture_cache/formatter.h"
13#include "video_core/texture_cache/image_base.h"
14#include "video_core/texture_cache/image_view_info.h"
15#include "video_core/texture_cache/util.h"
16
17namespace VideoCommon {
18
19using VideoCore::Surface::DefaultBlockHeight;
20using VideoCore::Surface::DefaultBlockWidth;
21
22namespace {
23/// Returns the base layer and mip level offset
24[[nodiscard]] std::pair<s32, s32> LayerMipOffset(s32 diff, u32 layer_stride) {
25 if (layer_stride == 0) {
26 return {0, diff};
27 } else {
28 return {diff / layer_stride, diff % layer_stride};
29 }
30}
31
32[[nodiscard]] bool ValidateLayers(const SubresourceLayers& layers, const ImageInfo& info) {
33 return layers.base_level < info.resources.levels &&
34 layers.base_layer + layers.num_layers <= info.resources.layers;
35}
36
37[[nodiscard]] bool ValidateCopy(const ImageCopy& copy, const ImageInfo& dst, const ImageInfo& src) {
38 const Extent3D src_size = MipSize(src.size, copy.src_subresource.base_level);
39 const Extent3D dst_size = MipSize(dst.size, copy.dst_subresource.base_level);
40 if (!ValidateLayers(copy.src_subresource, src)) {
41 return false;
42 }
43 if (!ValidateLayers(copy.dst_subresource, dst)) {
44 return false;
45 }
46 if (copy.src_offset.x + copy.extent.width > src_size.width ||
47 copy.src_offset.y + copy.extent.height > src_size.height ||
48 copy.src_offset.z + copy.extent.depth > src_size.depth) {
49 return false;
50 }
51 if (copy.dst_offset.x + copy.extent.width > dst_size.width ||
52 copy.dst_offset.y + copy.extent.height > dst_size.height ||
53 copy.dst_offset.z + copy.extent.depth > dst_size.depth) {
54 return false;
55 }
56 return true;
57}
58} // Anonymous namespace
59
60ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_)
61 : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)},
62 unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)},
63 converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_},
64 cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes},
65 mip_level_offsets{CalculateMipLevelOffsets(info)} {
66 if (info.type == ImageType::e3D) {
67 slice_offsets = CalculateSliceOffsets(info);
68 slice_subresources = CalculateSliceSubresources(info);
69 }
70}
71
72std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept {
73 if (other_addr < gpu_addr) {
74 // Subresource address can't be lower than the base
75 return std::nullopt;
76 }
77 const u32 diff = static_cast<u32>(other_addr - gpu_addr);
78 if (diff > guest_size_bytes) {
79 // This can happen when two CPU addresses are used for different GPU addresses
80 return std::nullopt;
81 }
82 if (info.type != ImageType::e3D) {
83 const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride);
84 const auto end = mip_level_offsets.begin() + info.resources.levels;
85 const auto it = std::find(mip_level_offsets.begin(), end, mip_offset);
86 if (layer > info.resources.layers || it == end) {
87 return std::nullopt;
88 }
89 return SubresourceBase{
90 .level = static_cast<s32>(std::distance(mip_level_offsets.begin(), it)),
91 .layer = layer,
92 };
93 } else {
94 // TODO: Consider using binary_search after a threshold
95 const auto it = std::ranges::find(slice_offsets, diff);
96 if (it == slice_offsets.cend()) {
97 return std::nullopt;
98 }
99 return slice_subresources[std::distance(slice_offsets.begin(), it)];
100 }
101}
102
103ImageViewId ImageBase::FindView(const ImageViewInfo& view_info) const noexcept {
104 const auto it = std::ranges::find(image_view_infos, view_info);
105 if (it == image_view_infos.end()) {
106 return ImageViewId{};
107 }
108 return image_view_ids[std::distance(image_view_infos.begin(), it)];
109}
110
111void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id) {
112 image_view_infos.push_back(view_info);
113 image_view_ids.push_back(image_view_id);
114}
115
116void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
117 static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
118 ASSERT(lhs.info.type == rhs.info.type);
119 std::optional<SubresourceBase> base;
120 if (lhs.info.type == ImageType::Linear) {
121 base = SubresourceBase{.level = 0, .layer = 0};
122 } else {
123 // We are passing relaxed formats as an option, having broken views or not won't matter
124 static constexpr bool broken_views = false;
125 base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS, broken_views);
126 }
127 if (!base) {
128 LOG_ERROR(HW_GPU, "Image alias should have been flipped");
129 return;
130 }
131 const PixelFormat lhs_format = lhs.info.format;
132 const PixelFormat rhs_format = rhs.info.format;
133 const Extent2D lhs_block{
134 .width = DefaultBlockWidth(lhs_format),
135 .height = DefaultBlockHeight(lhs_format),
136 };
137 const Extent2D rhs_block{
138 .width = DefaultBlockWidth(rhs_format),
139 .height = DefaultBlockHeight(rhs_format),
140 };
141 const bool is_lhs_compressed = lhs_block.width > 1 || lhs_block.height > 1;
142 const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1;
143 if (is_lhs_compressed && is_rhs_compressed) {
144 LOG_ERROR(HW_GPU, "Compressed to compressed image aliasing is not implemented");
145 return;
146 }
147 const s32 lhs_mips = lhs.info.resources.levels;
148 const s32 rhs_mips = rhs.info.resources.levels;
149 const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips);
150 AliasedImage lhs_alias;
151 AliasedImage rhs_alias;
152 lhs_alias.id = rhs_id;
153 rhs_alias.id = lhs_id;
154 lhs_alias.copies.reserve(num_mips);
155 rhs_alias.copies.reserve(num_mips);
156 for (s32 mip_level = 0; mip_level < num_mips; ++mip_level) {
157 Extent3D lhs_size = MipSize(lhs.info.size, base->level + mip_level);
158 Extent3D rhs_size = MipSize(rhs.info.size, mip_level);
159 if (is_lhs_compressed) {
160 lhs_size.width /= lhs_block.width;
161 lhs_size.height /= lhs_block.height;
162 }
163 if (is_rhs_compressed) {
164 rhs_size.width /= rhs_block.width;
165 rhs_size.height /= rhs_block.height;
166 }
167 const Extent3D copy_size{
168 .width = std::min(lhs_size.width, rhs_size.width),
169 .height = std::min(lhs_size.height, rhs_size.height),
170 .depth = std::min(lhs_size.depth, rhs_size.depth),
171 };
172 if (copy_size.width == 0 || copy_size.height == 0) {
173 LOG_WARNING(HW_GPU, "Copy size is smaller than block size. Mip cannot be aliased.");
174 continue;
175 }
176 const bool is_lhs_3d = lhs.info.type == ImageType::e3D;
177 const bool is_rhs_3d = rhs.info.type == ImageType::e3D;
178 const Offset3D lhs_offset{0, 0, 0};
179 const Offset3D rhs_offset{0, 0, is_rhs_3d ? base->layer : 0};
180 const s32 lhs_layers = is_lhs_3d ? 1 : lhs.info.resources.layers - base->layer;
181 const s32 rhs_layers = is_rhs_3d ? 1 : rhs.info.resources.layers;
182 const s32 num_layers = std::min(lhs_layers, rhs_layers);
183 const SubresourceLayers lhs_subresource{
184 .base_level = mip_level,
185 .base_layer = 0,
186 .num_layers = num_layers,
187 };
188 const SubresourceLayers rhs_subresource{
189 .base_level = base->level + mip_level,
190 .base_layer = is_rhs_3d ? 0 : base->layer,
191 .num_layers = num_layers,
192 };
193 [[maybe_unused]] const ImageCopy& to_lhs_copy = lhs_alias.copies.emplace_back(ImageCopy{
194 .src_subresource = lhs_subresource,
195 .dst_subresource = rhs_subresource,
196 .src_offset = lhs_offset,
197 .dst_offset = rhs_offset,
198 .extent = copy_size,
199 });
200 [[maybe_unused]] const ImageCopy& to_rhs_copy = rhs_alias.copies.emplace_back(ImageCopy{
201 .src_subresource = rhs_subresource,
202 .dst_subresource = lhs_subresource,
203 .src_offset = rhs_offset,
204 .dst_offset = lhs_offset,
205 .extent = copy_size,
206 });
207 ASSERT_MSG(ValidateCopy(to_lhs_copy, lhs.info, rhs.info), "Invalid RHS to LHS copy");
208 ASSERT_MSG(ValidateCopy(to_rhs_copy, rhs.info, lhs.info), "Invalid LHS to RHS copy");
209 }
210 ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty());
211 if (lhs_alias.copies.empty()) {
212 return;
213 }
214 lhs.aliased_images.push_back(std::move(lhs_alias));
215 rhs.aliased_images.push_back(std::move(rhs_alias));
216}
217
218} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
new file mode 100644
index 000000000..b7f3b7e43
--- /dev/null
+++ b/src/video_core/texture_cache/image_base.h
@@ -0,0 +1,83 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <optional>
9#include <vector>
10
11#include "common/common_funcs.h"
12#include "common/common_types.h"
13#include "video_core/texture_cache/image_info.h"
14#include "video_core/texture_cache/image_view_info.h"
15#include "video_core/texture_cache/types.h"
16
17namespace VideoCommon {
18
19enum class ImageFlagBits : u32 {
20 AcceleratedUpload = 1 << 0, ///< Upload can be accelerated in the GPU
21 Converted = 1 << 1, ///< Guest format is not supported natively and it has to be converted
22 CpuModified = 1 << 2, ///< Contents have been modified from the CPU
23 GpuModified = 1 << 3, ///< Contents have been modified from the GPU
24 Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU JIT
25 Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
26 Registered = 1 << 6, ///< True when the image is registered
27 Picked = 1 << 7, ///< Temporary flag to mark the image as picked
28};
29DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
30
31struct ImageViewInfo;
32
33struct AliasedImage {
34 std::vector<ImageCopy> copies;
35 ImageId id;
36};
37
38struct ImageBase {
39 explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
40
41 [[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept;
42
43 [[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept;
44
45 void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id);
46
47 [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
48 const VAddr overlap_end = overlap_cpu_addr + overlap_size;
49 return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
50 }
51
52 ImageInfo info;
53
54 u32 guest_size_bytes = 0;
55 u32 unswizzled_size_bytes = 0;
56 u32 converted_size_bytes = 0;
57 ImageFlagBits flags = ImageFlagBits::CpuModified;
58
59 GPUVAddr gpu_addr = 0;
60 VAddr cpu_addr = 0;
61 VAddr cpu_addr_end = 0;
62
63 u64 modification_tick = 0;
64 u64 frame_tick = 0;
65
66 std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{};
67
68 std::vector<ImageViewInfo> image_view_infos;
69 std::vector<ImageViewId> image_view_ids;
70
71 std::vector<u32> slice_offsets;
72 std::vector<SubresourceBase> slice_subresources;
73
74 std::vector<AliasedImage> aliased_images;
75};
76
77struct ImageAllocBase {
78 std::vector<ImageId> images;
79};
80
81void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id);
82
83} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp
new file mode 100644
index 000000000..64fd7010a
--- /dev/null
+++ b/src/video_core/texture_cache/image_info.cpp
@@ -0,0 +1,189 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "video_core/surface.h"
7#include "video_core/texture_cache/format_lookup_table.h"
8#include "video_core/texture_cache/image_info.h"
9#include "video_core/texture_cache/samples_helper.h"
10#include "video_core/texture_cache/types.h"
11#include "video_core/texture_cache/util.h"
12#include "video_core/textures/texture.h"
13
14namespace VideoCommon {
15
16using Tegra::Texture::TextureType;
17using Tegra::Texture::TICEntry;
18using VideoCore::Surface::PixelFormat;
19
20ImageInfo::ImageInfo(const TICEntry& config) noexcept {
21 format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
22 config.a_type, config.srgb_conversion);
23 num_samples = NumSamples(config.msaa_mode);
24 resources.levels = config.max_mip_level + 1;
25 if (config.IsPitchLinear()) {
26 pitch = config.Pitch();
27 } else if (config.IsBlockLinear()) {
28 block = Extent3D{
29 .width = config.block_width,
30 .height = config.block_height,
31 .depth = config.block_depth,
32 };
33 }
34 tile_width_spacing = config.tile_width_spacing;
35 if (config.texture_type != TextureType::Texture2D &&
36 config.texture_type != TextureType::Texture2DNoMipmap) {
37 ASSERT(!config.IsPitchLinear());
38 }
39 switch (config.texture_type) {
40 case TextureType::Texture1D:
41 ASSERT(config.BaseLayer() == 0);
42 type = ImageType::e1D;
43 size.width = config.Width();
44 break;
45 case TextureType::Texture1DArray:
46 UNIMPLEMENTED_IF(config.BaseLayer() != 0);
47 type = ImageType::e1D;
48 size.width = config.Width();
49 resources.layers = config.Depth();
50 break;
51 case TextureType::Texture2D:
52 case TextureType::Texture2DNoMipmap:
53 ASSERT(config.Depth() == 1);
54 type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D;
55 size.width = config.Width();
56 size.height = config.Height();
57 resources.layers = config.BaseLayer() + 1;
58 break;
59 case TextureType::Texture2DArray:
60 type = ImageType::e2D;
61 size.width = config.Width();
62 size.height = config.Height();
63 resources.layers = config.BaseLayer() + config.Depth();
64 break;
65 case TextureType::TextureCubemap:
66 ASSERT(config.Depth() == 1);
67 type = ImageType::e2D;
68 size.width = config.Width();
69 size.height = config.Height();
70 resources.layers = config.BaseLayer() + 6;
71 break;
72 case TextureType::TextureCubeArray:
73 UNIMPLEMENTED_IF(config.load_store_hint != 0);
74 type = ImageType::e2D;
75 size.width = config.Width();
76 size.height = config.Height();
77 resources.layers = config.BaseLayer() + config.Depth() * 6;
78 break;
79 case TextureType::Texture3D:
80 ASSERT(config.BaseLayer() == 0);
81 type = ImageType::e3D;
82 size.width = config.Width();
83 size.height = config.Height();
84 size.depth = config.Depth();
85 break;
86 case TextureType::Texture1DBuffer:
87 type = ImageType::Buffer;
88 size.width = config.Width();
89 break;
90 default:
91 UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
92 break;
93 }
94 if (type != ImageType::Linear) {
95 // FIXME: Call this without passing *this
96 layer_stride = CalculateLayerStride(*this);
97 maybe_unaligned_layer_stride = CalculateLayerSize(*this);
98 }
99}
100
101ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept {
102 const auto& rt = regs.rt[index];
103 format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format);
104 if (rt.tile_mode.is_pitch_linear) {
105 ASSERT(rt.tile_mode.is_3d == 0);
106 type = ImageType::Linear;
107 pitch = rt.width;
108 size = Extent3D{
109 .width = pitch / BytesPerBlock(format),
110 .height = rt.height,
111 .depth = 1,
112 };
113 return;
114 }
115 size.width = rt.width;
116 size.height = rt.height;
117 layer_stride = rt.layer_stride * 4;
118 maybe_unaligned_layer_stride = layer_stride;
119 num_samples = NumSamples(regs.multisample_mode);
120 block = Extent3D{
121 .width = rt.tile_mode.block_width,
122 .height = rt.tile_mode.block_height,
123 .depth = rt.tile_mode.block_depth,
124 };
125 if (rt.tile_mode.is_3d) {
126 type = ImageType::e3D;
127 size.depth = rt.depth;
128 } else {
129 type = ImageType::e2D;
130 resources.layers = rt.depth;
131 }
132}
133
134ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept {
135 format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format);
136 size.width = regs.zeta_width;
137 size.height = regs.zeta_height;
138 resources.levels = 1;
139 layer_stride = regs.zeta.layer_stride * 4;
140 maybe_unaligned_layer_stride = layer_stride;
141 num_samples = NumSamples(regs.multisample_mode);
142 block = Extent3D{
143 .width = regs.zeta.tile_mode.block_width,
144 .height = regs.zeta.tile_mode.block_height,
145 .depth = regs.zeta.tile_mode.block_depth,
146 };
147 if (regs.zeta.tile_mode.is_pitch_linear) {
148 ASSERT(regs.zeta.tile_mode.is_3d == 0);
149 type = ImageType::Linear;
150 pitch = size.width * BytesPerBlock(format);
151 } else if (regs.zeta.tile_mode.is_3d) {
152 ASSERT(regs.zeta.tile_mode.is_pitch_linear == 0);
153 type = ImageType::e3D;
154 size.depth = regs.zeta_depth;
155 } else {
156 type = ImageType::e2D;
157 resources.layers = regs.zeta_depth;
158 }
159}
160
161ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept {
162 UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero");
163 format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format);
164 if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) {
165 type = ImageType::Linear;
166 size = Extent3D{
167 .width = config.pitch / VideoCore::Surface::BytesPerBlock(format),
168 .height = config.height,
169 .depth = 1,
170 };
171 pitch = config.pitch;
172 } else {
173 type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D;
174 block = Extent3D{
175 .width = config.block_width,
176 .height = config.block_height,
177 .depth = config.block_depth,
178 };
179 // 3D blits with more than once slice are not implemented for now
180 // Render to individual slices
181 size = Extent3D{
182 .width = config.width,
183 .height = config.height,
184 .depth = 1,
185 };
186 }
187}
188
189} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h
new file mode 100644
index 000000000..5049fc36e
--- /dev/null
+++ b/src/video_core/texture_cache/image_info.h
@@ -0,0 +1,38 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/engines/fermi_2d.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/surface.h"
10#include "video_core/texture_cache/types.h"
11
12namespace VideoCommon {
13
14using Tegra::Texture::TICEntry;
15using VideoCore::Surface::PixelFormat;
16
17struct ImageInfo {
18 explicit ImageInfo() = default;
19 explicit ImageInfo(const TICEntry& config) noexcept;
20 explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept;
21 explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept;
22 explicit ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept;
23
24 PixelFormat format = PixelFormat::Invalid;
25 ImageType type = ImageType::e1D;
26 SubresourceExtent resources;
27 Extent3D size{1, 1, 1};
28 union {
29 Extent3D block{0, 0, 0};
30 u32 pitch;
31 };
32 u32 layer_stride = 0;
33 u32 maybe_unaligned_layer_stride = 0;
34 u32 num_samples = 1;
35 u32 tile_width_spacing = 0;
36};
37
38} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
new file mode 100644
index 000000000..18f72e508
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -0,0 +1,41 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6
7#include "common/assert.h"
8#include "core/settings.h"
9#include "video_core/compatible_formats.h"
10#include "video_core/surface.h"
11#include "video_core/texture_cache/formatter.h"
12#include "video_core/texture_cache/image_info.h"
13#include "video_core/texture_cache/image_view_base.h"
14#include "video_core/texture_cache/image_view_info.h"
15#include "video_core/texture_cache/types.h"
16
17namespace VideoCommon {
18
19ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
20 ImageId image_id_)
21 : image_id{image_id_}, format{info.format}, type{info.type}, range{info.range},
22 size{
23 .width = std::max(image_info.size.width >> range.base.level, 1u),
24 .height = std::max(image_info.size.height >> range.base.level, 1u),
25 .depth = std::max(image_info.size.depth >> range.base.level, 1u),
26 } {
27 ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false),
28 "Image view format {} is incompatible with image format {}", info.format,
29 image_info.format);
30 const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
31 if (image_info.type == ImageType::Linear && is_async) {
32 flags |= ImageViewFlagBits::PreemtiveDownload;
33 }
34 if (image_info.type == ImageType::e3D && info.type != ImageViewType::e3D) {
35 flags |= ImageViewFlagBits::Slice;
36 }
37}
38
39ImageViewBase::ImageViewBase(const NullImageParams&) {}
40
41} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h
new file mode 100644
index 000000000..73954167e
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_base.h
@@ -0,0 +1,47 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_funcs.h"
8#include "video_core/surface.h"
9#include "video_core/texture_cache/types.h"
10
11namespace VideoCommon {
12
13using VideoCore::Surface::PixelFormat;
14
15struct ImageViewInfo;
16struct ImageInfo;
17
18struct NullImageParams {};
19
20enum class ImageViewFlagBits : u16 {
21 PreemtiveDownload = 1 << 0,
22 Strong = 1 << 1,
23 Slice = 1 << 2,
24};
25DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits)
26
27struct ImageViewBase {
28 explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
29 ImageId image_id);
30 explicit ImageViewBase(const NullImageParams&);
31
32 [[nodiscard]] bool IsBuffer() const noexcept {
33 return type == ImageViewType::Buffer;
34 }
35
36 ImageId image_id{};
37 PixelFormat format{};
38 ImageViewType type{};
39 SubresourceRange range;
40 Extent3D size{0, 0, 0};
41 ImageViewFlagBits flags{};
42
43 u64 invalidation_tick = 0;
44 u64 modification_tick = 0;
45};
46
47} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp
new file mode 100644
index 000000000..faf5b151f
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_info.cpp
@@ -0,0 +1,88 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <limits>
6
7#include "common/assert.h"
8#include "video_core/texture_cache/image_view_info.h"
9#include "video_core/texture_cache/texture_cache.h"
10#include "video_core/texture_cache/types.h"
11#include "video_core/textures/texture.h"
12
13namespace VideoCommon {
14
15namespace {
16
17constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits<u8>::max();
18
19[[nodiscard]] u8 CastSwizzle(SwizzleSource source) {
20 const u8 casted = static_cast<u8>(source);
21 ASSERT(static_cast<SwizzleSource>(casted) == source);
22 return casted;
23}
24
25} // Anonymous namespace
26
27ImageViewInfo::ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept
28 : format{PixelFormatFromTIC(config)}, x_source{CastSwizzle(config.x_source)},
29 y_source{CastSwizzle(config.y_source)}, z_source{CastSwizzle(config.z_source)},
30 w_source{CastSwizzle(config.w_source)} {
31 range.base = SubresourceBase{
32 .level = static_cast<s32>(config.res_min_mip_level),
33 .layer = base_layer,
34 };
35 range.extent.levels = config.res_max_mip_level - config.res_min_mip_level + 1;
36
37 switch (config.texture_type) {
38 case TextureType::Texture1D:
39 ASSERT(config.Height() == 1);
40 ASSERT(config.Depth() == 1);
41 type = ImageViewType::e1D;
42 break;
43 case TextureType::Texture2D:
44 case TextureType::Texture2DNoMipmap:
45 ASSERT(config.Depth() == 1);
46 type = config.normalized_coords ? ImageViewType::e2D : ImageViewType::Rect;
47 break;
48 case TextureType::Texture3D:
49 type = ImageViewType::e3D;
50 break;
51 case TextureType::TextureCubemap:
52 ASSERT(config.Depth() == 1);
53 type = ImageViewType::Cube;
54 range.extent.layers = 6;
55 break;
56 case TextureType::Texture1DArray:
57 type = ImageViewType::e1DArray;
58 range.extent.layers = config.Depth();
59 break;
60 case TextureType::Texture2DArray:
61 type = ImageViewType::e2DArray;
62 range.extent.layers = config.Depth();
63 break;
64 case TextureType::Texture1DBuffer:
65 type = ImageViewType::Buffer;
66 break;
67 case TextureType::TextureCubeArray:
68 type = ImageViewType::CubeArray;
69 range.extent.layers = config.Depth() * 6;
70 break;
71 default:
72 UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
73 break;
74 }
75}
76
77ImageViewInfo::ImageViewInfo(ImageViewType type_, PixelFormat format_,
78 SubresourceRange range_) noexcept
79 : type{type_}, format{format_}, range{range_}, x_source{RENDER_TARGET_SWIZZLE},
80 y_source{RENDER_TARGET_SWIZZLE}, z_source{RENDER_TARGET_SWIZZLE},
81 w_source{RENDER_TARGET_SWIZZLE} {}
82
83bool ImageViewInfo::IsRenderTarget() const noexcept {
84 return x_source == RENDER_TARGET_SWIZZLE && y_source == RENDER_TARGET_SWIZZLE &&
85 z_source == RENDER_TARGET_SWIZZLE && w_source == RENDER_TARGET_SWIZZLE;
86}
87
88} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_info.h b/src/video_core/texture_cache/image_view_info.h
new file mode 100644
index 000000000..0c1f99117
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_info.h
@@ -0,0 +1,50 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <type_traits>
9
10#include "video_core/surface.h"
11#include "video_core/texture_cache/types.h"
12#include "video_core/textures/texture.h"
13
14namespace VideoCommon {
15
16using Tegra::Texture::SwizzleSource;
17using Tegra::Texture::TICEntry;
18using VideoCore::Surface::PixelFormat;
19
20/// Properties used to determine a image view
21struct ImageViewInfo {
22 explicit ImageViewInfo() noexcept = default;
23 explicit ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept;
24 explicit ImageViewInfo(ImageViewType type, PixelFormat format,
25 SubresourceRange range = {}) noexcept;
26
27 auto operator<=>(const ImageViewInfo&) const noexcept = default;
28
29 [[nodiscard]] bool IsRenderTarget() const noexcept;
30
31 [[nodiscard]] std::array<SwizzleSource, 4> Swizzle() const noexcept {
32 return std::array{
33 static_cast<SwizzleSource>(x_source),
34 static_cast<SwizzleSource>(y_source),
35 static_cast<SwizzleSource>(z_source),
36 static_cast<SwizzleSource>(w_source),
37 };
38 }
39
40 ImageViewType type{};
41 PixelFormat format{};
42 SubresourceRange range;
43 u8 x_source = static_cast<u8>(SwizzleSource::R);
44 u8 y_source = static_cast<u8>(SwizzleSource::G);
45 u8 z_source = static_cast<u8>(SwizzleSource::B);
46 u8 w_source = static_cast<u8>(SwizzleSource::A);
47};
48static_assert(std::has_unique_object_representations_v<ImageViewInfo>);
49
50} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h
new file mode 100644
index 000000000..9b9544b07
--- /dev/null
+++ b/src/video_core/texture_cache/render_targets.h
@@ -0,0 +1,51 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <span>
9#include <utility>
10
11#include "common/bit_cast.h"
12#include "video_core/texture_cache/types.h"
13
14namespace VideoCommon {
15
16/// Framebuffer properties used to lookup a framebuffer
17struct RenderTargets {
18 constexpr auto operator<=>(const RenderTargets&) const noexcept = default;
19
20 constexpr bool Contains(std::span<const ImageViewId> elements) const noexcept {
21 const auto contains = [elements](ImageViewId item) {
22 return std::ranges::find(elements, item) != elements.end();
23 };
24 return std::ranges::any_of(color_buffer_ids, contains) || contains(depth_buffer_id);
25 }
26
27 std::array<ImageViewId, NUM_RT> color_buffer_ids;
28 ImageViewId depth_buffer_id;
29 std::array<u8, NUM_RT> draw_buffers{};
30 Extent2D size;
31};
32
33} // namespace VideoCommon
34
35namespace std {
36
37template <>
38struct hash<VideoCommon::RenderTargets> {
39 size_t operator()(const VideoCommon::RenderTargets& rt) const noexcept {
40 using VideoCommon::ImageViewId;
41 size_t value = std::hash<ImageViewId>{}(rt.depth_buffer_id);
42 for (const ImageViewId color_buffer_id : rt.color_buffer_ids) {
43 value ^= std::hash<ImageViewId>{}(color_buffer_id);
44 }
45 value ^= Common::BitCast<u64>(rt.draw_buffers);
46 value ^= Common::BitCast<u64>(rt.size);
47 return value;
48 }
49};
50
51} // namespace std
diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h
new file mode 100644
index 000000000..04539a43c
--- /dev/null
+++ b/src/video_core/texture_cache/samples_helper.h
@@ -0,0 +1,55 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8
9#include "common/assert.h"
10#include "video_core/textures/texture.h"
11
12namespace VideoCommon {
13
14[[nodiscard]] inline std::pair<int, int> SamplesLog2(int num_samples) {
15 switch (num_samples) {
16 case 1:
17 return {0, 0};
18 case 2:
19 return {1, 0};
20 case 4:
21 return {1, 1};
22 case 8:
23 return {2, 1};
24 case 16:
25 return {2, 2};
26 }
27 UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
28 return {1, 1};
29}
30
31[[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) {
32 using Tegra::Texture::MsaaMode;
33 switch (msaa_mode) {
34 case MsaaMode::Msaa1x1:
35 return 1;
36 case MsaaMode::Msaa2x1:
37 case MsaaMode::Msaa2x1_D3D:
38 return 2;
39 case MsaaMode::Msaa2x2:
40 case MsaaMode::Msaa2x2_VC4:
41 case MsaaMode::Msaa2x2_VC12:
42 return 4;
43 case MsaaMode::Msaa4x2:
44 case MsaaMode::Msaa4x2_D3D:
45 case MsaaMode::Msaa4x2_VC8:
46 case MsaaMode::Msaa4x2_VC24:
47 return 8;
48 case MsaaMode::Msaa4x4:
49 return 16;
50 }
51 UNREACHABLE_MSG("Invalid MSAA mode={}", static_cast<int>(msaa_mode));
52 return 1;
53}
54
55} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
new file mode 100644
index 000000000..eae3be6ea
--- /dev/null
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -0,0 +1,156 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <concepts>
9#include <numeric>
10#include <type_traits>
11#include <utility>
12#include <vector>
13
14#include "common/assert.h"
15#include "common/common_types.h"
16
17namespace VideoCommon {
18
19struct SlotId {
20 static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max();
21
22 constexpr auto operator<=>(const SlotId&) const noexcept = default;
23
24 constexpr explicit operator bool() const noexcept {
25 return index != INVALID_INDEX;
26 }
27
28 u32 index = INVALID_INDEX;
29};
30
31template <class T>
32requires std::is_nothrow_move_assignable_v<T>&&
33 std::is_nothrow_move_constructible_v<T> class SlotVector {
34public:
35 ~SlotVector() noexcept {
36 size_t index = 0;
37 for (u64 bits : stored_bitset) {
38 for (size_t bit = 0; bits; ++bit, bits >>= 1) {
39 if ((bits & 1) != 0) {
40 values[index + bit].object.~T();
41 }
42 }
43 index += 64;
44 }
45 delete[] values;
46 }
47
48 [[nodiscard]] T& operator[](SlotId id) noexcept {
49 ValidateIndex(id);
50 return values[id.index].object;
51 }
52
53 [[nodiscard]] const T& operator[](SlotId id) const noexcept {
54 ValidateIndex(id);
55 return values[id.index].object;
56 }
57
58 template <typename... Args>
59 [[nodiscard]] SlotId insert(Args&&... args) noexcept {
60 const u32 index = FreeValueIndex();
61 new (&values[index].object) T(std::forward<Args>(args)...);
62 SetStorageBit(index);
63
64 return SlotId{index};
65 }
66
67 void erase(SlotId id) noexcept {
68 values[id.index].object.~T();
69 free_list.push_back(id.index);
70 ResetStorageBit(id.index);
71 }
72
73private:
74 struct NonTrivialDummy {
75 NonTrivialDummy() noexcept {}
76 };
77
78 union Entry {
79 Entry() noexcept : dummy{} {}
80 ~Entry() noexcept {}
81
82 NonTrivialDummy dummy;
83 T object;
84 };
85
86 void SetStorageBit(u32 index) noexcept {
87 stored_bitset[index / 64] |= u64(1) << (index % 64);
88 }
89
90 void ResetStorageBit(u32 index) noexcept {
91 stored_bitset[index / 64] &= ~(u64(1) << (index % 64));
92 }
93
94 bool ReadStorageBit(u32 index) noexcept {
95 return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0;
96 }
97
98 void ValidateIndex(SlotId id) const noexcept {
99 DEBUG_ASSERT(id);
100 DEBUG_ASSERT(id.index / 64 < stored_bitset.size());
101 DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0);
102 }
103
104 [[nodiscard]] u32 FreeValueIndex() noexcept {
105 if (free_list.empty()) {
106 Reserve(values_capacity ? (values_capacity << 1) : 1);
107 }
108 const u32 free_index = free_list.back();
109 free_list.pop_back();
110 return free_index;
111 }
112
113 void Reserve(size_t new_capacity) noexcept {
114 Entry* const new_values = new Entry[new_capacity];
115 size_t index = 0;
116 for (u64 bits : stored_bitset) {
117 for (size_t bit = 0; bits; ++bit, bits >>= 1) {
118 const size_t i = index + bit;
119 if ((bits & 1) == 0) {
120 continue;
121 }
122 T& old_value = values[i].object;
123 new (&new_values[i].object) T(std::move(old_value));
124 old_value.~T();
125 }
126 index += 64;
127 }
128
129 stored_bitset.resize((new_capacity + 63) / 64);
130
131 const size_t old_free_size = free_list.size();
132 free_list.resize(old_free_size + (new_capacity - values_capacity));
133 std::iota(free_list.begin() + old_free_size, free_list.end(),
134 static_cast<u32>(values_capacity));
135
136 delete[] values;
137 values = new_values;
138 values_capacity = new_capacity;
139 }
140
141 Entry* values = nullptr;
142 size_t values_capacity = 0;
143 size_t values_size = 0;
144
145 std::vector<u64> stored_bitset;
146 std::vector<u32> free_list;
147};
148
149} // namespace VideoCommon
150
151template <>
152struct std::hash<VideoCommon::SlotId> {
153 size_t operator()(const VideoCommon::SlotId& id) const noexcept {
154 return std::hash<u32>{}(id.index);
155 }
156};
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
deleted file mode 100644
index efbcf6723..000000000
--- a/src/video_core/texture_cache/surface_base.cpp
+++ /dev/null
@@ -1,299 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/algorithm.h"
6#include "common/assert.h"
7#include "common/common_types.h"
8#include "common/microprofile.h"
9#include "video_core/memory_manager.h"
10#include "video_core/texture_cache/surface_base.h"
11#include "video_core/texture_cache/surface_params.h"
12#include "video_core/textures/convert.h"
13
14namespace VideoCommon {
15
16MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128));
17MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128));
18
19using Tegra::Texture::ConvertFromGuestToHost;
20using VideoCore::MortonSwizzleMode;
21using VideoCore::Surface::IsPixelFormatASTC;
22using VideoCore::Surface::PixelFormat;
23
24StagingCache::StagingCache() = default;
25
26StagingCache::~StagingCache() = default;
27
28SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_,
29 bool is_astc_supported_)
30 : params{params_}, gpu_addr{gpu_addr_}, mipmap_sizes(params_.num_levels),
31 mipmap_offsets(params.num_levels) {
32 is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported_;
33 host_memory_size = params.GetHostSizeInBytes(is_converted);
34
35 std::size_t offset = 0;
36 for (u32 level = 0; level < params.num_levels; ++level) {
37 const std::size_t mipmap_size{params.GetGuestMipmapSize(level)};
38 mipmap_sizes[level] = mipmap_size;
39 mipmap_offsets[level] = offset;
40 offset += mipmap_size;
41 }
42 layer_size = offset;
43 if (params.is_layered) {
44 if (params.is_tiled) {
45 layer_size =
46 SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth);
47 }
48 guest_memory_size = layer_size * params.depth;
49 } else {
50 guest_memory_size = layer_size;
51 }
52}
53
54MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const {
55 const u32 src_bpp{params.GetBytesPerPixel()};
56 const u32 dst_bpp{rhs.GetBytesPerPixel()};
57 const bool ib1 = params.IsBuffer();
58 const bool ib2 = rhs.IsBuffer();
59 if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) {
60 const bool cb1 = params.IsCompressed();
61 const bool cb2 = rhs.IsCompressed();
62 if (cb1 == cb2) {
63 return MatchTopologyResult::FullMatch;
64 }
65 return MatchTopologyResult::CompressUnmatch;
66 }
67 return MatchTopologyResult::None;
68}
69
70MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const {
71 // Buffer surface Check
72 if (params.IsBuffer()) {
73 const std::size_t wd1 = params.width * params.GetBytesPerPixel();
74 const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel();
75 if (wd1 == wd2) {
76 return MatchStructureResult::FullMatch;
77 }
78 return MatchStructureResult::None;
79 }
80
81 // Linear Surface check
82 if (!params.is_tiled) {
83 if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) {
84 if (params.width == rhs.width) {
85 return MatchStructureResult::FullMatch;
86 } else {
87 return MatchStructureResult::SemiMatch;
88 }
89 }
90 return MatchStructureResult::None;
91 }
92
93 // Tiled Surface check
94 if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth,
95 params.tile_width_spacing, params.num_levels) ==
96 std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth,
97 rhs.tile_width_spacing, rhs.num_levels)) {
98 if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) {
99 return MatchStructureResult::FullMatch;
100 }
101 const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format,
102 rhs.pixel_format);
103 const u32 hs =
104 SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format);
105 const u32 w1 = params.GetBlockAlignedWidth();
106 if (std::tie(w1, params.height) == std::tie(ws, hs)) {
107 return MatchStructureResult::SemiMatch;
108 }
109 }
110 return MatchStructureResult::None;
111}
112
113std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(
114 const GPUVAddr candidate_gpu_addr) const {
115 if (gpu_addr == candidate_gpu_addr) {
116 return {{0, 0}};
117 }
118
119 if (candidate_gpu_addr < gpu_addr) {
120 return std::nullopt;
121 }
122
123 const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};
124 const auto layer{static_cast<u32>(relative_address / layer_size)};
125 if (layer >= params.depth) {
126 return std::nullopt;
127 }
128
129 const GPUVAddr mipmap_address = relative_address - layer_size * layer;
130 const auto mipmap_it =
131 Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
132 if (mipmap_it == mipmap_offsets.end()) {
133 return std::nullopt;
134 }
135
136 const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))};
137 return std::make_pair(layer, level);
138}
139
140std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const {
141 const u32 layers{params.depth};
142 const u32 mipmaps{params.num_levels};
143 std::vector<CopyParams> result;
144 result.reserve(static_cast<std::size_t>(layers) * static_cast<std::size_t>(mipmaps));
145
146 for (u32 layer = 0; layer < layers; layer++) {
147 for (u32 level = 0; level < mipmaps; level++) {
148 const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
149 const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
150 result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1);
151 }
152 }
153 return result;
154}
155
156std::vector<CopyParams> SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const {
157 const u32 mipmaps{params.num_levels};
158 std::vector<CopyParams> result;
159 result.reserve(mipmaps);
160
161 for (u32 level = 0; level < mipmaps; level++) {
162 const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
163 const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
164 const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))};
165 result.emplace_back(width, height, depth, level);
166 }
167 return result;
168}
169
170void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory,
171 const SurfaceParams& surface_params, u8* buffer, u32 level) {
172 const u32 width{surface_params.GetMipWidth(level)};
173 const u32 height{surface_params.GetMipHeight(level)};
174 const u32 block_height{surface_params.GetMipBlockHeight(level)};
175 const u32 block_depth{surface_params.GetMipBlockDepth(level)};
176
177 std::size_t guest_offset{mipmap_offsets[level]};
178 if (surface_params.is_layered) {
179 std::size_t host_offset = 0;
180 const std::size_t guest_stride = layer_size;
181 const std::size_t host_stride = surface_params.GetHostLayerSize(level);
182 for (u32 layer = 0; layer < surface_params.depth; ++layer) {
183 MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height,
184 block_depth, 1, surface_params.tile_width_spacing, buffer + host_offset,
185 memory + guest_offset);
186 guest_offset += guest_stride;
187 host_offset += host_stride;
188 }
189 } else {
190 MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height, block_depth,
191 surface_params.GetMipDepth(level), surface_params.tile_width_spacing, buffer,
192 memory + guest_offset);
193 }
194}
195
196void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
197 StagingCache& staging_cache) {
198 MICROPROFILE_SCOPE(GPU_Load_Texture);
199 auto& staging_buffer = staging_cache.GetBuffer(0);
200 u8* host_ptr;
201 // Use an extra temporal buffer
202 auto& tmp_buffer = staging_cache.GetBuffer(1);
203 tmp_buffer.resize(guest_memory_size);
204 host_ptr = tmp_buffer.data();
205 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
206
207 if (params.is_tiled) {
208 ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
209 params.block_width, static_cast<u32>(params.target));
210 for (u32 level = 0; level < params.num_levels; ++level) {
211 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
212 SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params,
213 staging_buffer.data() + host_offset, level);
214 }
215 } else {
216 ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented");
217 const u32 bpp{params.GetBytesPerPixel()};
218 const u32 block_width{params.GetDefaultBlockWidth()};
219 const u32 block_height{params.GetDefaultBlockHeight()};
220 const u32 width{(params.width + block_width - 1) / block_width};
221 const u32 height{(params.height + block_height - 1) / block_height};
222 const u32 copy_size{width * bpp};
223 if (params.pitch == copy_size) {
224 std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false));
225 } else {
226 const u8* start{host_ptr};
227 u8* write_to{staging_buffer.data()};
228 for (u32 h = height; h > 0; --h) {
229 std::memcpy(write_to, start, copy_size);
230 start += params.pitch;
231 write_to += copy_size;
232 }
233 }
234 }
235
236 if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) {
237 return;
238 }
239
240 for (u32 level = params.num_levels; level--;) {
241 const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)};
242 const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)};
243 u8* const in_buffer = staging_buffer.data() + in_host_offset;
244 u8* const out_buffer = staging_buffer.data() + out_host_offset;
245 ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format,
246 params.GetMipWidth(level), params.GetMipHeight(level),
247 params.GetMipDepth(level), true, true);
248 }
249}
250
251void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
252 StagingCache& staging_cache) {
253 MICROPROFILE_SCOPE(GPU_Flush_Texture);
254 auto& staging_buffer = staging_cache.GetBuffer(0);
255 u8* host_ptr;
256
257 // Use an extra temporal buffer
258 auto& tmp_buffer = staging_cache.GetBuffer(1);
259 tmp_buffer.resize(guest_memory_size);
260 host_ptr = tmp_buffer.data();
261
262 if (params.target == SurfaceTarget::Texture3D) {
263 // Special case for 3D texture segments
264 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
265 }
266
267 if (params.is_tiled) {
268 ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
269 for (u32 level = 0; level < params.num_levels; ++level) {
270 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
271 SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params,
272 staging_buffer.data() + host_offset, level);
273 }
274 } else if (params.IsBuffer()) {
275 // Buffers don't have pitch or any fancy layout property. We can just memcpy them to guest
276 // memory.
277 std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
278 } else {
279 ASSERT(params.target == SurfaceTarget::Texture2D);
280 ASSERT(params.num_levels == 1);
281
282 const u32 bpp{params.GetBytesPerPixel()};
283 const u32 copy_size{params.width * bpp};
284 if (params.pitch == copy_size) {
285 std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
286 } else {
287 u8* start{host_ptr};
288 const u8* read_to{staging_buffer.data()};
289 for (u32 h = params.height; h > 0; --h) {
290 std::memcpy(start, read_to, copy_size);
291 start += params.pitch;
292 read_to += copy_size;
293 }
294 }
295 }
296 memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
297}
298
299} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
deleted file mode 100644
index b57135fe4..000000000
--- a/src/video_core/texture_cache/surface_base.h
+++ /dev/null
@@ -1,333 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <tuple>
9#include <unordered_map>
10#include <vector>
11
12#include "common/common_types.h"
13#include "video_core/gpu.h"
14#include "video_core/morton.h"
15#include "video_core/texture_cache/copy_params.h"
16#include "video_core/texture_cache/surface_params.h"
17#include "video_core/texture_cache/surface_view.h"
18
19namespace Tegra {
20class MemoryManager;
21}
22
23namespace VideoCommon {
24
25using VideoCore::MortonSwizzleMode;
26using VideoCore::Surface::SurfaceTarget;
27
28enum class MatchStructureResult : u32 {
29 FullMatch = 0,
30 SemiMatch = 1,
31 None = 2,
32};
33
34enum class MatchTopologyResult : u32 {
35 FullMatch = 0,
36 CompressUnmatch = 1,
37 None = 2,
38};
39
40class StagingCache {
41public:
42 explicit StagingCache();
43 ~StagingCache();
44
45 std::vector<u8>& GetBuffer(std::size_t index) {
46 return staging_buffer[index];
47 }
48
49 const std::vector<u8>& GetBuffer(std::size_t index) const {
50 return staging_buffer[index];
51 }
52
53 void SetSize(std::size_t size) {
54 staging_buffer.resize(size);
55 }
56
57private:
58 std::vector<std::vector<u8>> staging_buffer;
59};
60
61class SurfaceBaseImpl {
62public:
63 void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
64
65 void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
66
67 GPUVAddr GetGpuAddr() const {
68 return gpu_addr;
69 }
70
71 bool Overlaps(const VAddr start, const VAddr end) const {
72 return (cpu_addr < end) && (cpu_addr_end > start);
73 }
74
75 bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) const {
76 const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size;
77 return gpu_addr <= other_start && other_end <= gpu_addr_end;
78 }
79
80 // Use only when recycling a surface
81 void SetGpuAddr(const GPUVAddr new_addr) {
82 gpu_addr = new_addr;
83 }
84
85 VAddr GetCpuAddr() const {
86 return cpu_addr;
87 }
88
89 VAddr GetCpuAddrEnd() const {
90 return cpu_addr_end;
91 }
92
93 void SetCpuAddr(const VAddr new_addr) {
94 cpu_addr = new_addr;
95 cpu_addr_end = new_addr + guest_memory_size;
96 }
97
98 const SurfaceParams& GetSurfaceParams() const {
99 return params;
100 }
101
102 std::size_t GetSizeInBytes() const {
103 return guest_memory_size;
104 }
105
106 std::size_t GetHostSizeInBytes() const {
107 return host_memory_size;
108 }
109
110 std::size_t GetMipmapSize(const u32 level) const {
111 return mipmap_sizes[level];
112 }
113
114 bool IsLinear() const {
115 return !params.is_tiled;
116 }
117
118 bool IsConverted() const {
119 return is_converted;
120 }
121
122 bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const {
123 return params.pixel_format == pixel_format;
124 }
125
126 VideoCore::Surface::PixelFormat GetFormat() const {
127 return params.pixel_format;
128 }
129
130 bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const {
131 return params.target == target;
132 }
133
134 MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const;
135
136 MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const;
137
138 bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const {
139 return std::tie(gpu_addr, params.target, params.num_levels) ==
140 std::tie(other_gpu_addr, rhs.target, rhs.num_levels) &&
141 params.target == SurfaceTarget::Texture2D && params.num_levels == 1;
142 }
143
144 std::optional<std::pair<u32, u32>> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const;
145
146 std::vector<CopyParams> BreakDown(const SurfaceParams& in_params) const {
147 return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params);
148 }
149
150protected:
151 explicit SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_,
152 bool is_astc_supported_);
153 ~SurfaceBaseImpl() = default;
154
155 virtual void DecorateSurfaceName() = 0;
156
157 const SurfaceParams params;
158 std::size_t layer_size;
159 std::size_t guest_memory_size;
160 std::size_t host_memory_size;
161 GPUVAddr gpu_addr{};
162 VAddr cpu_addr{};
163 VAddr cpu_addr_end{};
164 bool is_converted{};
165
166 std::vector<std::size_t> mipmap_sizes;
167 std::vector<std::size_t> mipmap_offsets;
168
169private:
170 void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& surface_params,
171 u8* buffer, u32 level);
172
173 std::vector<CopyParams> BreakDownLayered(const SurfaceParams& in_params) const;
174
175 std::vector<CopyParams> BreakDownNonLayered(const SurfaceParams& in_params) const;
176};
177
178template <typename TView>
179class SurfaceBase : public SurfaceBaseImpl {
180public:
181 virtual void UploadTexture(const std::vector<u8>& staging_buffer) = 0;
182
183 virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0;
184
185 void MarkAsModified(bool is_modified_, u64 tick) {
186 is_modified = is_modified_ || is_target;
187 modification_tick = tick;
188 }
189
190 void MarkAsRenderTarget(bool is_target_, u32 index_) {
191 is_target = is_target_;
192 index = index_;
193 }
194
195 void SetMemoryMarked(bool is_memory_marked_) {
196 is_memory_marked = is_memory_marked_;
197 }
198
199 bool IsMemoryMarked() const {
200 return is_memory_marked;
201 }
202
203 void SetSyncPending(bool is_sync_pending_) {
204 is_sync_pending = is_sync_pending_;
205 }
206
207 bool IsSyncPending() const {
208 return is_sync_pending;
209 }
210
211 void MarkAsPicked(bool is_picked_) {
212 is_picked = is_picked_;
213 }
214
215 bool IsModified() const {
216 return is_modified;
217 }
218
219 bool IsProtected() const {
220 // Only 3D slices are to be protected
221 return is_target && params.target == SurfaceTarget::Texture3D;
222 }
223
224 bool IsRenderTarget() const {
225 return is_target;
226 }
227
228 u32 GetRenderTarget() const {
229 return index;
230 }
231
232 bool IsRegistered() const {
233 return is_registered;
234 }
235
236 bool IsPicked() const {
237 return is_picked;
238 }
239
240 void MarkAsRegistered(bool is_reg) {
241 is_registered = is_reg;
242 }
243
244 u64 GetModificationTick() const {
245 return modification_tick;
246 }
247
248 TView EmplaceOverview(const SurfaceParams& overview_params) {
249 const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth};
250 return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
251 }
252
253 TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) {
254 return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth,
255 base_level, num_levels));
256 }
257
258 std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params,
259 const GPUVAddr view_addr,
260 const std::size_t candidate_size, const u32 mipmap,
261 const u32 layer) {
262 const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)};
263 if (!layer_mipmap) {
264 return {};
265 }
266 const auto [end_layer, end_mipmap] = *layer_mipmap;
267 if (layer != end_layer) {
268 if (mipmap == 0 && end_mipmap == 0) {
269 return GetView(ViewParams(view_params.target, layer, end_layer - layer, 0, 1));
270 }
271 return {};
272 } else {
273 return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap));
274 }
275 }
276
277 std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
278 const std::size_t candidate_size) {
279 if (params.target == SurfaceTarget::Texture3D ||
280 view_params.target == SurfaceTarget::Texture3D ||
281 (params.num_levels == 1 && !params.is_layered)) {
282 return {};
283 }
284 const auto layer_mipmap{GetLayerMipmap(view_addr)};
285 if (!layer_mipmap) {
286 return {};
287 }
288 const auto [layer, mipmap] = *layer_mipmap;
289 if (GetMipmapSize(mipmap) != candidate_size) {
290 return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer);
291 }
292 return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1));
293 }
294
295 TView GetMainView() const {
296 return main_view;
297 }
298
299protected:
300 explicit SurfaceBase(const GPUVAddr gpu_addr_, const SurfaceParams& params_,
301 bool is_astc_supported_)
302 : SurfaceBaseImpl{gpu_addr_, params_, is_astc_supported_} {}
303
304 ~SurfaceBase() = default;
305
306 virtual TView CreateView(const ViewParams& view_key) = 0;
307
308 TView main_view;
309 std::unordered_map<ViewParams, TView> views;
310
311private:
312 TView GetView(const ViewParams& key) {
313 const auto [entry, is_cache_miss] = views.try_emplace(key);
314 auto& view{entry->second};
315 if (is_cache_miss) {
316 view = CreateView(key);
317 }
318 return view;
319 }
320
321 static constexpr u32 NO_RT = 0xFFFFFFFF;
322
323 bool is_modified{};
324 bool is_target{};
325 bool is_registered{};
326 bool is_picked{};
327 bool is_memory_marked{};
328 bool is_sync_pending{};
329 u32 index{NO_RT};
330 u64 modification_tick{};
331};
332
333} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
deleted file mode 100644
index 96f93246d..000000000
--- a/src/video_core/texture_cache/surface_params.cpp
+++ /dev/null
@@ -1,445 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <string>
7#include <tuple>
8
9#include "common/alignment.h"
10#include "common/bit_util.h"
11#include "core/core.h"
12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/surface.h"
14#include "video_core/texture_cache/format_lookup_table.h"
15#include "video_core/texture_cache/surface_params.h"
16
17namespace VideoCommon {
18
19using VideoCore::Surface::PixelFormat;
20using VideoCore::Surface::PixelFormatFromDepthFormat;
21using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
22using VideoCore::Surface::SurfaceTarget;
23using VideoCore::Surface::SurfaceTargetFromTextureType;
24using VideoCore::Surface::SurfaceType;
25
26namespace {
27
28SurfaceTarget TextureTypeToSurfaceTarget(Tegra::Shader::TextureType type, bool is_array) {
29 switch (type) {
30 case Tegra::Shader::TextureType::Texture1D:
31 return is_array ? SurfaceTarget::Texture1DArray : SurfaceTarget::Texture1D;
32 case Tegra::Shader::TextureType::Texture2D:
33 return is_array ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
34 case Tegra::Shader::TextureType::Texture3D:
35 ASSERT(!is_array);
36 return SurfaceTarget::Texture3D;
37 case Tegra::Shader::TextureType::TextureCube:
38 return is_array ? SurfaceTarget::TextureCubeArray : SurfaceTarget::TextureCubemap;
39 default:
40 UNREACHABLE();
41 return SurfaceTarget::Texture2D;
42 }
43}
44
45SurfaceTarget ImageTypeToSurfaceTarget(Tegra::Shader::ImageType type) {
46 switch (type) {
47 case Tegra::Shader::ImageType::Texture1D:
48 return SurfaceTarget::Texture1D;
49 case Tegra::Shader::ImageType::TextureBuffer:
50 return SurfaceTarget::TextureBuffer;
51 case Tegra::Shader::ImageType::Texture1DArray:
52 return SurfaceTarget::Texture1DArray;
53 case Tegra::Shader::ImageType::Texture2D:
54 return SurfaceTarget::Texture2D;
55 case Tegra::Shader::ImageType::Texture2DArray:
56 return SurfaceTarget::Texture2DArray;
57 case Tegra::Shader::ImageType::Texture3D:
58 return SurfaceTarget::Texture3D;
59 default:
60 UNREACHABLE();
61 return SurfaceTarget::Texture2D;
62 }
63}
64
65constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) {
66 return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile);
67}
68
69} // Anonymous namespace
70
71SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_table,
72 const Tegra::Texture::TICEntry& tic,
73 const VideoCommon::Shader::Sampler& entry) {
74 SurfaceParams params;
75 params.is_tiled = tic.IsTiled();
76 params.srgb_conversion = tic.IsSrgbConversionEnabled();
77 params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
78 params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
79 params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
80 params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
81 params.pixel_format = lookup_table.GetPixelFormat(
82 tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
83 params.type = GetFormatType(params.pixel_format);
84 if (entry.is_shadow && params.type == SurfaceType::ColorTexture) {
85 switch (params.pixel_format) {
86 case PixelFormat::R16_UNORM:
87 case PixelFormat::R16_FLOAT:
88 params.pixel_format = PixelFormat::D16_UNORM;
89 break;
90 case PixelFormat::R32_FLOAT:
91 params.pixel_format = PixelFormat::D32_FLOAT;
92 break;
93 default:
94 UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}",
95 static_cast<u32>(params.pixel_format));
96 }
97 params.type = GetFormatType(params.pixel_format);
98 }
99 // TODO: on 1DBuffer we should use the tic info.
100 if (tic.IsBuffer()) {
101 params.target = SurfaceTarget::TextureBuffer;
102 params.width = tic.Width();
103 params.pitch = params.width * params.GetBytesPerPixel();
104 params.height = 1;
105 params.depth = 1;
106 params.num_levels = 1;
107 params.emulated_levels = 1;
108 params.is_layered = false;
109 } else {
110 params.target = TextureTypeToSurfaceTarget(entry.type, entry.is_array);
111 params.width = tic.Width();
112 params.height = tic.Height();
113 params.depth = tic.Depth();
114 params.pitch = params.is_tiled ? 0 : tic.Pitch();
115 if (params.target == SurfaceTarget::TextureCubemap ||
116 params.target == SurfaceTarget::TextureCubeArray) {
117 params.depth *= 6;
118 }
119 params.num_levels = tic.max_mip_level + 1;
120 params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
121 params.is_layered = params.IsLayered();
122 }
123 return params;
124}
125
126SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_table,
127 const Tegra::Texture::TICEntry& tic,
128 const VideoCommon::Shader::Image& entry) {
129 SurfaceParams params;
130 params.is_tiled = tic.IsTiled();
131 params.srgb_conversion = tic.IsSrgbConversionEnabled();
132 params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
133 params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
134 params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
135 params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
136 params.pixel_format = lookup_table.GetPixelFormat(
137 tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
138 params.type = GetFormatType(params.pixel_format);
139 params.target = ImageTypeToSurfaceTarget(entry.type);
140 // TODO: on 1DBuffer we should use the tic info.
141 if (tic.IsBuffer()) {
142 params.target = SurfaceTarget::TextureBuffer;
143 params.width = tic.Width();
144 params.pitch = params.width * params.GetBytesPerPixel();
145 params.height = 1;
146 params.depth = 1;
147 params.num_levels = 1;
148 params.emulated_levels = 1;
149 params.is_layered = false;
150 } else {
151 params.width = tic.Width();
152 params.height = tic.Height();
153 params.depth = tic.Depth();
154 params.pitch = params.is_tiled ? 0 : tic.Pitch();
155 if (params.target == SurfaceTarget::TextureCubemap ||
156 params.target == SurfaceTarget::TextureCubeArray) {
157 params.depth *= 6;
158 }
159 params.num_levels = tic.max_mip_level + 1;
160 params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
161 params.is_layered = params.IsLayered();
162 }
163 return params;
164}
165
166SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) {
167 const auto& regs = maxwell3d.regs;
168 const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);
169 const bool is_layered = regs.zeta_layers > 1 && block_depth == 0;
170 const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
171 return {
172 .is_tiled = regs.zeta.memory_layout.type ==
173 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear,
174 .srgb_conversion = false,
175 .is_layered = is_layered,
176 .block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U),
177 .block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U),
178 .block_depth = block_depth,
179 .tile_width_spacing = 1,
180 .width = regs.zeta_width,
181 .height = regs.zeta_height,
182 .depth = is_layered ? regs.zeta_layers.Value() : 1U,
183 .pitch = 0,
184 .num_levels = 1,
185 .emulated_levels = 1,
186 .pixel_format = pixel_format,
187 .type = GetFormatType(pixel_format),
188 .target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D,
189 };
190}
191
192SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
193 std::size_t index) {
194 const auto& config{maxwell3d.regs.rt[index]};
195 SurfaceParams params;
196 params.is_tiled =
197 config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
198 params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
199 config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB;
200 params.block_width = config.memory_layout.block_width;
201 params.block_height = config.memory_layout.block_height;
202 params.block_depth = config.memory_layout.block_depth;
203 params.tile_width_spacing = 1;
204 params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
205 params.type = GetFormatType(params.pixel_format);
206 if (params.is_tiled) {
207 params.pitch = 0;
208 params.width = config.width;
209 } else {
210 const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT;
211 params.pitch = config.width;
212 params.width = params.pitch / bpp;
213 }
214 params.height = config.height;
215 params.num_levels = 1;
216 params.emulated_levels = 1;
217
218 if (config.memory_layout.is_3d != 0) {
219 params.depth = config.layers.Value();
220 params.is_layered = false;
221 params.target = SurfaceTarget::Texture3D;
222 } else if (config.layers > 1) {
223 params.depth = config.layers.Value();
224 params.is_layered = true;
225 params.target = SurfaceTarget::Texture2DArray;
226 } else {
227 params.depth = 1;
228 params.is_layered = false;
229 params.target = SurfaceTarget::Texture2D;
230 }
231 return params;
232}
233
234SurfaceParams SurfaceParams::CreateForFermiCopySurface(
235 const Tegra::Engines::Fermi2D::Regs::Surface& config) {
236 const bool is_tiled = !config.linear;
237 const auto pixel_format = PixelFormatFromRenderTargetFormat(config.format);
238
239 SurfaceParams params{
240 .is_tiled = is_tiled,
241 .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
242 config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB,
243 .is_layered = false,
244 .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U,
245 .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U,
246 .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U,
247 .tile_width_spacing = 1,
248 .width = config.width,
249 .height = config.height,
250 .depth = 1,
251 .pitch = config.pitch,
252 .num_levels = 1,
253 .emulated_levels = 1,
254 .pixel_format = pixel_format,
255 .type = GetFormatType(pixel_format),
256 // TODO(Rodrigo): Try to guess texture arrays from parameters
257 .target = SurfaceTarget::Texture2D,
258 };
259
260 params.is_layered = params.IsLayered();
261 return params;
262}
263
264VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget(
265 const VideoCommon::Shader::Sampler& entry) {
266 return TextureTypeToSurfaceTarget(entry.type, entry.is_array);
267}
268
269VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget(
270 const VideoCommon::Shader::Image& entry) {
271 return ImageTypeToSurfaceTarget(entry.type);
272}
273
274bool SurfaceParams::IsLayered() const {
275 switch (target) {
276 case SurfaceTarget::Texture1DArray:
277 case SurfaceTarget::Texture2DArray:
278 case SurfaceTarget::TextureCubemap:
279 case SurfaceTarget::TextureCubeArray:
280 return true;
281 default:
282 return false;
283 }
284}
285
286// Auto block resizing algorithm from:
287// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
288u32 SurfaceParams::GetMipBlockHeight(u32 level) const {
289 if (level == 0) {
290 return this->block_height;
291 }
292
293 const u32 height_new{GetMipHeight(level)};
294 const u32 default_block_height{GetDefaultBlockHeight()};
295 const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height};
296 const u32 block_height_new = Common::Log2Ceil32(blocks_in_y);
297 return std::clamp(block_height_new, 3U, 7U) - 3U;
298}
299
300u32 SurfaceParams::GetMipBlockDepth(u32 level) const {
301 if (level == 0) {
302 return this->block_depth;
303 }
304 if (is_layered) {
305 return 0;
306 }
307
308 const u32 depth_new{GetMipDepth(level)};
309 const u32 block_depth_new = Common::Log2Ceil32(depth_new);
310 if (block_depth_new > 4) {
311 return 5 - (GetMipBlockHeight(level) >= 2);
312 }
313 return block_depth_new;
314}
315
316std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
317 std::size_t offset = 0;
318 for (u32 i = 0; i < level; i++) {
319 offset += GetInnerMipmapMemorySize(i, false, false);
320 }
321 return offset;
322}
323
324std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const {
325 std::size_t offset = 0;
326 if (is_converted) {
327 for (u32 i = 0; i < level; ++i) {
328 offset += GetConvertedMipmapSize(i) * GetNumLayers();
329 }
330 } else {
331 for (u32 i = 0; i < level; ++i) {
332 offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
333 }
334 }
335 return offset;
336}
337
338std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const {
339 constexpr std::size_t rgba8_bpp = 4ULL;
340 const std::size_t mip_width = GetMipWidth(level);
341 const std::size_t mip_height = GetMipHeight(level);
342 const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level);
343 return mip_width * mip_height * mip_depth * rgba8_bpp;
344}
345
346std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const {
347 std::size_t size = 0;
348 for (u32 level = 0; level < num_levels; ++level) {
349 size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed);
350 }
351 if (is_tiled && is_layered) {
352 return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
353 }
354 return size;
355}
356
357std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size,
358 bool uncompressed) const {
359 const u32 mip_width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())};
360 const u32 mip_height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
361 const u32 mip_depth{is_layered ? 1U : GetMipDepth(level)};
362 if (is_tiled) {
363 return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), mip_width,
364 mip_height, mip_depth, GetMipBlockHeight(level),
365 GetMipBlockDepth(level));
366 } else if (as_host_size || IsBuffer()) {
367 return GetBytesPerPixel() * mip_width * mip_height * mip_depth;
368 } else {
369 // Linear Texture Case
370 return pitch * mip_height * mip_depth;
371 }
372}
373
374bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
375 return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width,
376 height, depth, pitch, num_levels, pixel_format, type, target) ==
377 std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth,
378 rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch,
379 rhs.num_levels, rhs.pixel_format, rhs.type, rhs.target);
380}
381
382std::string SurfaceParams::TargetName() const {
383 switch (target) {
384 case SurfaceTarget::Texture1D:
385 return "1D";
386 case SurfaceTarget::TextureBuffer:
387 return "TexBuffer";
388 case SurfaceTarget::Texture2D:
389 return "2D";
390 case SurfaceTarget::Texture3D:
391 return "3D";
392 case SurfaceTarget::Texture1DArray:
393 return "1DArray";
394 case SurfaceTarget::Texture2DArray:
395 return "2DArray";
396 case SurfaceTarget::TextureCubemap:
397 return "Cube";
398 case SurfaceTarget::TextureCubeArray:
399 return "CubeArray";
400 default:
401 LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target);
402 UNREACHABLE();
403 return fmt::format("TUK({})", target);
404 }
405}
406
407u32 SurfaceParams::GetBlockSize() const {
408 const u32 x = 64U << block_width;
409 const u32 y = 8U << block_height;
410 const u32 z = 1U << block_depth;
411 return x * y * z;
412}
413
414std::pair<u32, u32> SurfaceParams::GetBlockXY() const {
415 const u32 x_pixels = 64U / GetBytesPerPixel();
416 const u32 x = x_pixels << block_width;
417 const u32 y = 8U << block_height;
418 return {x, y};
419}
420
421std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const {
422 const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
423 const u32 block_size = GetBlockSize();
424 const u32 block_index = offset / block_size;
425 const u32 gob_offset = offset % block_size;
426 const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GOB_SIZE);
427 const u32 x_gob_pixels = 64U / GetBytesPerPixel();
428 const u32 x_block_pixels = x_gob_pixels << block_width;
429 const u32 y_block_pixels = 8U << block_height;
430 const u32 z_block_pixels = 1U << block_depth;
431 const u32 x_blocks = div_ceil(width, x_block_pixels);
432 const u32 y_blocks = div_ceil(height, y_block_pixels);
433 const u32 z_blocks = div_ceil(depth, z_block_pixels);
434 const u32 base_x = block_index % x_blocks;
435 const u32 base_y = (block_index / x_blocks) % y_blocks;
436 const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks;
437 u32 x = base_x * x_block_pixels;
438 u32 y = base_y * y_block_pixels;
439 u32 z = base_z * z_block_pixels;
440 z += gob_index >> block_height;
441 y += (gob_index * 8U) % y_block_pixels;
442 return {x, y, z};
443}
444
445} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
deleted file mode 100644
index 4466c3c34..000000000
--- a/src/video_core/texture_cache/surface_params.h
+++ /dev/null
@@ -1,294 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8
9#include "common/alignment.h"
10#include "common/bit_util.h"
11#include "common/cityhash.h"
12#include "common/common_types.h"
13#include "video_core/engines/fermi_2d.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/shader/shader_ir.h"
16#include "video_core/surface.h"
17#include "video_core/textures/decoders.h"
18
19namespace VideoCommon {
20
21class FormatLookupTable;
22
23class SurfaceParams {
24public:
25 /// Creates SurfaceCachedParams from a texture configuration.
26 static SurfaceParams CreateForTexture(const FormatLookupTable& lookup_table,
27 const Tegra::Texture::TICEntry& tic,
28 const VideoCommon::Shader::Sampler& entry);
29
30 /// Creates SurfaceCachedParams from an image configuration.
31 static SurfaceParams CreateForImage(const FormatLookupTable& lookup_table,
32 const Tegra::Texture::TICEntry& tic,
33 const VideoCommon::Shader::Image& entry);
34
35 /// Creates SurfaceCachedParams for a depth buffer configuration.
36 static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d);
37
38 /// Creates SurfaceCachedParams from a framebuffer configuration.
39 static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
40 std::size_t index);
41
42 /// Creates SurfaceCachedParams from a Fermi2D surface configuration.
43 static SurfaceParams CreateForFermiCopySurface(
44 const Tegra::Engines::Fermi2D::Regs::Surface& config);
45
46 /// Obtains the texture target from a shader's sampler entry.
47 static VideoCore::Surface::SurfaceTarget ExpectedTarget(
48 const VideoCommon::Shader::Sampler& entry);
49
50 /// Obtains the texture target from a shader's sampler entry.
51 static VideoCore::Surface::SurfaceTarget ExpectedTarget(
52 const VideoCommon::Shader::Image& entry);
53
54 std::size_t Hash() const {
55 return static_cast<std::size_t>(
56 Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
57 }
58
59 bool operator==(const SurfaceParams& rhs) const;
60
61 bool operator!=(const SurfaceParams& rhs) const {
62 return !operator==(rhs);
63 }
64
65 std::size_t GetGuestSizeInBytes() const {
66 return GetInnerMemorySize(false, false, false);
67 }
68
69 std::size_t GetHostSizeInBytes(bool is_converted) const {
70 if (!is_converted) {
71 return GetInnerMemorySize(true, false, false);
72 }
73 // ASTC is uncompressed in software, in emulated as RGBA8
74 std::size_t host_size_in_bytes = 0;
75 for (u32 level = 0; level < num_levels; ++level) {
76 host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers();
77 }
78 return host_size_in_bytes;
79 }
80
81 u32 GetBlockAlignedWidth() const {
82 return Common::AlignUp(width, 64 / GetBytesPerPixel());
83 }
84
85 /// Returns the width of a given mipmap level.
86 u32 GetMipWidth(u32 level) const {
87 return std::max(1U, width >> level);
88 }
89
90 /// Returns the height of a given mipmap level.
91 u32 GetMipHeight(u32 level) const {
92 return std::max(1U, height >> level);
93 }
94
95 /// Returns the depth of a given mipmap level.
96 u32 GetMipDepth(u32 level) const {
97 return is_layered ? depth : std::max(1U, depth >> level);
98 }
99
100 /// Returns the block height of a given mipmap level.
101 u32 GetMipBlockHeight(u32 level) const;
102
103 /// Returns the block depth of a given mipmap level.
104 u32 GetMipBlockDepth(u32 level) const;
105
106 /// Returns the best possible row/pitch alignment for the surface.
107 u32 GetRowAlignment(u32 level, bool is_converted) const {
108 const u32 bpp = is_converted ? 4 : GetBytesPerPixel();
109 return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp);
110 }
111
112 /// Returns the offset in bytes in guest memory of a given mipmap level.
113 std::size_t GetGuestMipmapLevelOffset(u32 level) const;
114
115 /// Returns the offset in bytes in host memory (linear) of a given mipmap level.
116 std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const;
117
118 /// Returns the size in bytes in guest memory of a given mipmap level.
119 std::size_t GetGuestMipmapSize(u32 level) const {
120 return GetInnerMipmapMemorySize(level, false, false);
121 }
122
123 /// Returns the size in bytes in host memory (linear) of a given mipmap level.
124 std::size_t GetHostMipmapSize(u32 level) const {
125 return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers();
126 }
127
128 std::size_t GetConvertedMipmapSize(u32 level) const;
129
130 /// Get this texture Tegra Block size in guest memory layout
131 u32 GetBlockSize() const;
132
133 /// Get X, Y coordinates max sizes of a single block.
134 std::pair<u32, u32> GetBlockXY() const;
135
136 /// Get the offset in x, y, z coordinates from a memory offset
137 std::tuple<u32, u32, u32> GetBlockOffsetXYZ(u32 offset) const;
138
139 /// Returns the size of a layer in bytes in guest memory.
140 std::size_t GetGuestLayerSize() const {
141 return GetLayerSize(false, false);
142 }
143
144 /// Returns the size of a layer in bytes in host memory for a given mipmap level.
145 std::size_t GetHostLayerSize(u32 level) const {
146 ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D);
147 return GetInnerMipmapMemorySize(level, true, false);
148 }
149
150 /// Returns the max possible mipmap that the texture can have in host gpu
151 u32 MaxPossibleMipmap() const {
152 const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U;
153 const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U;
154 const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h);
155 if (target != VideoCore::Surface::SurfaceTarget::Texture3D)
156 return max_mipmap;
157 return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U);
158 }
159
160 /// Returns if the guest surface is a compressed surface.
161 bool IsCompressed() const {
162 return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1;
163 }
164
165 /// Returns the default block width.
166 u32 GetDefaultBlockWidth() const {
167 return VideoCore::Surface::GetDefaultBlockWidth(pixel_format);
168 }
169
170 /// Returns the default block height.
171 u32 GetDefaultBlockHeight() const {
172 return VideoCore::Surface::GetDefaultBlockHeight(pixel_format);
173 }
174
175 /// Returns the bits per pixel.
176 u32 GetBitsPerPixel() const {
177 return VideoCore::Surface::GetFormatBpp(pixel_format);
178 }
179
180 /// Returns the bytes per pixel.
181 u32 GetBytesPerPixel() const {
182 return VideoCore::Surface::GetBytesPerPixel(pixel_format);
183 }
184
185 /// Returns true if the pixel format is a depth and/or stencil format.
186 bool IsPixelFormatZeta() const {
187 return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat &&
188 pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
189 }
190
191 /// Returns is the surface is a TextureBuffer type of surface.
192 bool IsBuffer() const {
193 return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;
194 }
195
196 /// Returns the number of layers in the surface.
197 std::size_t GetNumLayers() const {
198 return is_layered ? depth : 1;
199 }
200
201 /// Returns the debug name of the texture for use in graphic debuggers.
202 std::string TargetName() const;
203
204 // Helper used for out of class size calculations
205 static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height,
206 const u32 block_depth) {
207 return Common::AlignBits(out_size,
208 Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
209 }
210
211 /// Converts a width from a type of surface into another. This helps represent the
212 /// equivalent value between compressed/non-compressed textures.
213 static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from,
214 VideoCore::Surface::PixelFormat pixel_format_to) {
215 const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from);
216 const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to);
217 return (width * bw2 + bw1 - 1) / bw1;
218 }
219
220 /// Converts a height from a type of surface into another. This helps represent the
221 /// equivalent value between compressed/non-compressed textures.
222 static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from,
223 VideoCore::Surface::PixelFormat pixel_format_to) {
224 const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from);
225 const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to);
226 return (height * bh2 + bh1 - 1) / bh1;
227 }
228
229 // Finds the maximun possible width between 2 2D layers of different formats
230 static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params,
231 const u32 src_level, const u32 dst_level) {
232 const u32 bw1 = src_params.GetDefaultBlockWidth();
233 const u32 bw2 = dst_params.GetDefaultBlockWidth();
234 const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1;
235 const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2;
236 return std::min(t_src_width, t_dst_width);
237 }
238
239 // Finds the maximun possible height between 2 2D layers of different formats
240 static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params,
241 const u32 src_level, const u32 dst_level) {
242 const u32 bh1 = src_params.GetDefaultBlockHeight();
243 const u32 bh2 = dst_params.GetDefaultBlockHeight();
244 const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1;
245 const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2;
246 return std::min(t_src_height, t_dst_height);
247 }
248
249 bool is_tiled;
250 bool srgb_conversion;
251 bool is_layered;
252 u32 block_width;
253 u32 block_height;
254 u32 block_depth;
255 u32 tile_width_spacing;
256 u32 width;
257 u32 height;
258 u32 depth;
259 u32 pitch;
260 u32 num_levels;
261 u32 emulated_levels;
262 VideoCore::Surface::PixelFormat pixel_format;
263 VideoCore::Surface::SurfaceType type;
264 VideoCore::Surface::SurfaceTarget target;
265
266private:
267 /// Returns the size of a given mipmap level inside a layer.
268 std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const;
269
270 /// Returns the size of all mipmap levels and aligns as needed.
271 std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const {
272 return GetLayerSize(as_host_size, uncompressed) *
273 (layer_only ? 1U : (is_layered ? depth : 1U));
274 }
275
276 /// Returns the size of a layer
277 std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const;
278
279 /// Returns true if these parameters are from a layered surface.
280 bool IsLayered() const;
281};
282
283} // namespace VideoCommon
284
285namespace std {
286
287template <>
288struct hash<VideoCommon::SurfaceParams> {
289 std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept {
290 return k.Hash();
291 }
292};
293
294} // namespace std
diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp
deleted file mode 100644
index 6b5f5984b..000000000
--- a/src/video_core/texture_cache/surface_view.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <tuple>
6
7#include "common/common_types.h"
8#include "video_core/texture_cache/surface_view.h"
9
10namespace VideoCommon {
11
12std::size_t ViewParams::Hash() const {
13 return static_cast<std::size_t>(base_layer) ^ (static_cast<std::size_t>(num_layers) << 16) ^
14 (static_cast<std::size_t>(base_level) << 24) ^
15 (static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36);
16}
17
18bool ViewParams::operator==(const ViewParams& rhs) const {
19 return std::tie(base_layer, num_layers, base_level, num_levels, target) ==
20 std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target);
21}
22
23bool ViewParams::operator!=(const ViewParams& rhs) const {
24 return !operator==(rhs);
25}
26
27} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h
deleted file mode 100644
index 199f72732..000000000
--- a/src/video_core/texture_cache/surface_view.h
+++ /dev/null
@@ -1,68 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <functional>
8
9#include "common/common_types.h"
10#include "video_core/surface.h"
11#include "video_core/texture_cache/surface_params.h"
12
13namespace VideoCommon {
14
15struct ViewParams {
16 constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target_, u32 base_layer_,
17 u32 num_layers_, u32 base_level_, u32 num_levels_)
18 : target{target_}, base_layer{base_layer_}, num_layers{num_layers_},
19 base_level{base_level_}, num_levels{num_levels_} {}
20
21 std::size_t Hash() const;
22
23 bool operator==(const ViewParams& rhs) const;
24 bool operator!=(const ViewParams& rhs) const;
25
26 bool IsLayered() const {
27 switch (target) {
28 case VideoCore::Surface::SurfaceTarget::Texture1DArray:
29 case VideoCore::Surface::SurfaceTarget::Texture2DArray:
30 case VideoCore::Surface::SurfaceTarget::TextureCubemap:
31 case VideoCore::Surface::SurfaceTarget::TextureCubeArray:
32 return true;
33 default:
34 return false;
35 }
36 }
37
38 VideoCore::Surface::SurfaceTarget target{};
39 u32 base_layer{};
40 u32 num_layers{};
41 u32 base_level{};
42 u32 num_levels{};
43};
44
45class ViewBase {
46public:
47 constexpr explicit ViewBase(const ViewParams& view_params) : params{view_params} {}
48
49 constexpr const ViewParams& GetViewParams() const {
50 return params;
51 }
52
53protected:
54 ViewParams params;
55};
56
57} // namespace VideoCommon
58
59namespace std {
60
61template <>
62struct hash<VideoCommon::ViewParams> {
63 std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept {
64 return k.Hash();
65 }
66};
67
68} // namespace std
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 581d8dd5b..d1080300f 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -6,1298 +6,1454 @@
6 6
7#include <algorithm> 7#include <algorithm>
8#include <array> 8#include <array>
9#include <list> 9#include <bit>
10#include <memory> 10#include <memory>
11#include <mutex> 11#include <mutex>
12#include <set> 12#include <optional>
13#include <tuple> 13#include <span>
14#include <type_traits>
14#include <unordered_map> 15#include <unordered_map>
16#include <utility>
15#include <vector> 17#include <vector>
16 18
17#include <boost/container/small_vector.hpp> 19#include <boost/container/small_vector.hpp>
18#include <boost/icl/interval_map.hpp>
19#include <boost/range/iterator_range.hpp>
20 20
21#include "common/assert.h" 21#include "common/alignment.h"
22#include "common/common_funcs.h"
22#include "common/common_types.h" 23#include "common/common_types.h"
23#include "common/math_util.h" 24#include "common/logging/log.h"
24#include "core/core.h"
25#include "core/memory.h"
26#include "core/settings.h"
27#include "video_core/compatible_formats.h" 25#include "video_core/compatible_formats.h"
26#include "video_core/delayed_destruction_ring.h"
28#include "video_core/dirty_flags.h" 27#include "video_core/dirty_flags.h"
29#include "video_core/engines/fermi_2d.h" 28#include "video_core/engines/fermi_2d.h"
29#include "video_core/engines/kepler_compute.h"
30#include "video_core/engines/maxwell_3d.h" 30#include "video_core/engines/maxwell_3d.h"
31#include "video_core/gpu.h"
32#include "video_core/memory_manager.h" 31#include "video_core/memory_manager.h"
33#include "video_core/rasterizer_interface.h" 32#include "video_core/rasterizer_interface.h"
34#include "video_core/surface.h" 33#include "video_core/surface.h"
35#include "video_core/texture_cache/copy_params.h" 34#include "video_core/texture_cache/descriptor_table.h"
36#include "video_core/texture_cache/format_lookup_table.h" 35#include "video_core/texture_cache/format_lookup_table.h"
37#include "video_core/texture_cache/surface_base.h" 36#include "video_core/texture_cache/formatter.h"
38#include "video_core/texture_cache/surface_params.h" 37#include "video_core/texture_cache/image_base.h"
39#include "video_core/texture_cache/surface_view.h" 38#include "video_core/texture_cache/image_info.h"
40 39#include "video_core/texture_cache/image_view_base.h"
41namespace Tegra::Texture { 40#include "video_core/texture_cache/image_view_info.h"
42struct FullTextureInfo; 41#include "video_core/texture_cache/render_targets.h"
43} 42#include "video_core/texture_cache/samples_helper.h"
44 43#include "video_core/texture_cache/slot_vector.h"
45namespace VideoCore { 44#include "video_core/texture_cache/types.h"
46class RasterizerInterface; 45#include "video_core/texture_cache/util.h"
47} 46#include "video_core/textures/texture.h"
48 47
49namespace VideoCommon { 48namespace VideoCommon {
50 49
51using VideoCore::Surface::FormatCompatibility; 50using Tegra::Texture::SwizzleSource;
51using Tegra::Texture::TextureType;
52using Tegra::Texture::TICEntry;
53using Tegra::Texture::TSCEntry;
54using VideoCore::Surface::GetFormatType;
55using VideoCore::Surface::IsCopyCompatible;
52using VideoCore::Surface::PixelFormat; 56using VideoCore::Surface::PixelFormat;
53using VideoCore::Surface::SurfaceTarget; 57using VideoCore::Surface::PixelFormatFromDepthFormat;
54using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; 58using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
59using VideoCore::Surface::SurfaceType;
55 60
56template <typename TSurface, typename TView> 61template <class P>
57class TextureCache { 62class TextureCache {
58 using VectorSurface = boost::container::small_vector<TSurface, 1>; 63 /// Address shift for caching images into a hash table
64 static constexpr u64 PAGE_BITS = 20;
65
66 /// Enables debugging features to the texture cache
67 static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
68 /// Implement blits as copies between framebuffers
69 static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
70 /// True when some copies have to be emulated
71 static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
72
73 /// Image view ID for null descriptors
74 static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
75 /// Sampler ID for bugged sampler ids
76 static constexpr SamplerId NULL_SAMPLER_ID{0};
77
78 using Runtime = typename P::Runtime;
79 using Image = typename P::Image;
80 using ImageAlloc = typename P::ImageAlloc;
81 using ImageView = typename P::ImageView;
82 using Sampler = typename P::Sampler;
83 using Framebuffer = typename P::Framebuffer;
84
85 struct BlitImages {
86 ImageId dst_id;
87 ImageId src_id;
88 PixelFormat dst_format;
89 PixelFormat src_format;
90 };
91
92 template <typename T>
93 struct IdentityHash {
94 [[nodiscard]] size_t operator()(T value) const noexcept {
95 return static_cast<size_t>(value);
96 }
97 };
59 98
60public: 99public:
61 void InvalidateRegion(VAddr addr, std::size_t size) { 100 explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
62 std::lock_guard lock{mutex}; 101 Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
63 102
64 for (const auto& surface : GetSurfacesInRegion(addr, size)) { 103 /// Notify the cache that a new frame has been queued
65 Unregister(surface); 104 void TickFrame();
66 }
67 }
68 105
69 void OnCPUWrite(VAddr addr, std::size_t size) { 106 /// Return an unique mutually exclusive lock for the cache
70 std::lock_guard lock{mutex}; 107 [[nodiscard]] std::unique_lock<std::mutex> AcquireLock();
71 108
72 for (const auto& surface : GetSurfacesInRegion(addr, size)) { 109 /// Return a constant reference to the given image view id
73 if (surface->IsMemoryMarked()) { 110 [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
74 UnmarkMemory(surface);
75 surface->SetSyncPending(true);
76 marked_for_unregister.emplace_back(surface);
77 }
78 }
79 }
80 111
81 void SyncGuestHost() { 112 /// Return a reference to the given image view id
82 std::lock_guard lock{mutex}; 113 [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
83 114
84 for (const auto& surface : marked_for_unregister) { 115 /// Fill image_view_ids with the graphics images in indices
85 if (surface->IsRegistered()) { 116 void FillGraphicsImageViews(std::span<const u32> indices,
86 surface->SetSyncPending(false); 117 std::span<ImageViewId> image_view_ids);
87 Unregister(surface);
88 }
89 }
90 marked_for_unregister.clear();
91 }
92 118
93 /** 119 /// Fill image_view_ids with the compute images in indices
94 * Guarantees that rendertargets don't unregister themselves if the 120 void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids);
95 * collide. Protection is currently only done on 3D slices.
96 */
97 void GuardRenderTargets(bool new_guard) {
98 guard_render_targets = new_guard;
99 }
100 121
101 void GuardSamplers(bool new_guard) { 122 /// Get the sampler from the graphics descriptor table in the specified index
102 guard_samplers = new_guard; 123 Sampler* GetGraphicsSampler(u32 index);
103 }
104 124
105 void FlushRegion(VAddr addr, std::size_t size) { 125 /// Get the sampler from the compute descriptor table in the specified index
106 std::lock_guard lock{mutex}; 126 Sampler* GetComputeSampler(u32 index);
107 127
108 auto surfaces = GetSurfacesInRegion(addr, size); 128 /// Refresh the state for graphics image view and sampler descriptors
109 if (surfaces.empty()) { 129 void SynchronizeGraphicsDescriptors();
110 return;
111 }
112 std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) {
113 return a->GetModificationTick() < b->GetModificationTick();
114 });
115 for (const auto& surface : surfaces) {
116 mutex.unlock();
117 FlushSurface(surface);
118 mutex.lock();
119 }
120 }
121 130
122 bool MustFlushRegion(VAddr addr, std::size_t size) { 131 /// Refresh the state for compute image view and sampler descriptors
123 std::lock_guard lock{mutex}; 132 void SynchronizeComputeDescriptors();
124 133
125 const auto surfaces = GetSurfacesInRegion(addr, size); 134 /// Update bound render targets and upload memory if necessary
126 return std::any_of(surfaces.cbegin(), surfaces.cend(), 135 /// @param is_clear True when the render targets are being used for clears
127 [](const TSurface& surface) { return surface->IsModified(); }); 136 void UpdateRenderTargets(bool is_clear);
128 }
129 137
130 TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, 138 /// Find a framebuffer with the currently bound render targets
131 const VideoCommon::Shader::Sampler& entry) { 139 /// UpdateRenderTargets should be called before this
132 std::lock_guard lock{mutex}; 140 Framebuffer* GetFramebuffer();
133 const auto gpu_addr{tic.Address()};
134 if (!gpu_addr) {
135 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
136 }
137 141
138 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 142 /// Mark images in a range as modified from the CPU
139 if (!cpu_addr) { 143 void WriteMemory(VAddr cpu_addr, size_t size);
140 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
141 }
142 144
143 if (!IsTypeCompatible(tic.texture_type, entry)) { 145 /// Download contents of host images to guest memory in a region
144 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 146 void DownloadMemory(VAddr cpu_addr, size_t size);
145 }
146 147
147 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; 148 /// Remove images in a region
148 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); 149 void UnmapMemory(VAddr cpu_addr, size_t size);
149 if (guard_samplers) {
150 sampled_textures.push_back(surface);
151 }
152 return view;
153 }
154 150
155 TView GetImageSurface(const Tegra::Texture::TICEntry& tic, 151 /// Blit an image with the given parameters
156 const VideoCommon::Shader::Image& entry) { 152 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
157 std::lock_guard lock{mutex}; 153 const Tegra::Engines::Fermi2D::Surface& src,
158 const auto gpu_addr{tic.Address()}; 154 const Tegra::Engines::Fermi2D::Config& copy);
159 if (!gpu_addr) {
160 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
161 }
162 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
163 if (!cpu_addr) {
164 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
165 }
166 const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
167 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
168 if (guard_samplers) {
169 sampled_textures.push_back(surface);
170 }
171 return view;
172 }
173 155
174 bool TextureBarrier() { 156 /// Invalidate the contents of the color buffer index
175 const bool any_rt = 157 /// These contents become unspecified, the cache can assume aggressive optimizations.
176 std::any_of(sampled_textures.begin(), sampled_textures.end(), 158 void InvalidateColorBuffer(size_t index);
177 [](const auto& surface) { return surface->IsRenderTarget(); });
178 sampled_textures.clear();
179 return any_rt;
180 }
181 159
182 TView GetDepthBufferSurface(bool preserve_contents) { 160 /// Invalidate the contents of the depth buffer
183 std::lock_guard lock{mutex}; 161 /// These contents become unspecified, the cache can assume aggressive optimizations.
184 auto& dirty = maxwell3d.dirty; 162 void InvalidateDepthBuffer();
185 if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
186 return depth_buffer.view;
187 }
188 dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
189 163
190 const auto& regs{maxwell3d.regs}; 164 /// Try to find a cached image view in the given CPU address
191 const auto gpu_addr{regs.zeta.Address()}; 165 [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr);
192 if (!gpu_addr || !regs.zeta_enable) {
193 SetEmptyDepthBuffer();
194 return {};
195 }
196 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
197 if (!cpu_addr) {
198 SetEmptyDepthBuffer();
199 return {};
200 }
201 const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)};
202 auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
203 if (depth_buffer.target)
204 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
205 depth_buffer.target = surface_view.first;
206 depth_buffer.view = surface_view.second;
207 if (depth_buffer.target)
208 depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
209 return surface_view.second;
210 }
211
212 TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
213 std::lock_guard lock{mutex};
214 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
215 if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) {
216 return render_targets[index].view;
217 }
218 maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false;
219 166
220 const auto& regs{maxwell3d.regs}; 167 /// Return true when there are uncommitted images to be downloaded
221 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || 168 [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
222 regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
223 SetEmptyColorBuffer(index);
224 return {};
225 }
226 169
227 const auto& config{regs.rt[index]}; 170 /// Return true when the caller should wait for async downloads
228 const auto gpu_addr{config.Address()}; 171 [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
229 if (!gpu_addr) {
230 SetEmptyColorBuffer(index);
231 return {};
232 }
233 172
234 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 173 /// Commit asynchronous downloads
235 if (!cpu_addr) { 174 void CommitAsyncFlushes();
236 SetEmptyColorBuffer(index); 175
237 return {}; 176 /// Pop asynchronous downloads
238 } 177 void PopAsyncFlushes();
178
179 /// Return true when a CPU region is modified from the GPU
180 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
239 181
240 auto surface_view = 182private:
241 GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index), 183 /// Iterate over all page indices in a range
242 preserve_contents, true); 184 template <typename Func>
243 if (render_targets[index].target) { 185 static void ForEachPage(VAddr addr, size_t size, Func&& func) {
244 auto& surface = render_targets[index].target; 186 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
245 surface->MarkAsRenderTarget(false, NO_RT); 187 const u64 page_end = (addr + size - 1) >> PAGE_BITS;
246 const auto& cr_params = surface->GetSurfaceParams(); 188 for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
247 if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation.GetValue()) { 189 if constexpr (RETURNS_BOOL) {
248 AsyncFlushSurface(surface); 190 if (func(page)) {
191 break;
192 }
193 } else {
194 func(page);
249 } 195 }
250 } 196 }
251 render_targets[index].target = surface_view.first;
252 render_targets[index].view = surface_view.second;
253 if (render_targets[index].target)
254 render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
255 return surface_view.second;
256 } 197 }
257 198
258 void MarkColorBufferInUse(std::size_t index) { 199 /// Fills image_view_ids in the image views in indices
259 if (auto& render_target = render_targets[index].target) { 200 void FillImageViews(DescriptorTable<TICEntry>& table,
260 render_target->MarkAsModified(true, Tick()); 201 std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
261 } 202 std::span<ImageViewId> image_view_ids);
262 }
263 203
264 void MarkDepthBufferInUse() { 204 /// Find or create an image view in the guest descriptor table
265 if (depth_buffer.target) { 205 ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
266 depth_buffer.target->MarkAsModified(true, Tick()); 206 std::span<ImageViewId> cached_image_view_ids, u32 index);
267 }
268 }
269 207
270 void SetEmptyDepthBuffer() { 208 /// Find or create a framebuffer with the given render target parameters
271 if (depth_buffer.target == nullptr) { 209 FramebufferId GetFramebufferId(const RenderTargets& key);
272 return;
273 }
274 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
275 depth_buffer.target = nullptr;
276 depth_buffer.view = nullptr;
277 }
278 210
279 void SetEmptyColorBuffer(std::size_t index) { 211 /// Refresh the contents (pixel data) of an image
280 if (render_targets[index].target == nullptr) { 212 void RefreshContents(Image& image);
281 return;
282 }
283 render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
284 render_targets[index].target = nullptr;
285 render_targets[index].view = nullptr;
286 }
287
288 void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
289 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
290 const Tegra::Engines::Fermi2D::Config& copy_config) {
291 std::lock_guard lock{mutex};
292 SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
293 SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
294 const GPUVAddr src_gpu_addr = src_config.Address();
295 const GPUVAddr dst_gpu_addr = dst_config.Address();
296 DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
297
298 const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr);
299 const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr);
300 std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
301 TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
302 ImageBlit(src_surface, dst_surface.second, copy_config);
303 dst_surface.first->MarkAsModified(true, Tick());
304 }
305
306 TSurface TryFindFramebufferSurface(VAddr addr) const {
307 if (!addr) {
308 return nullptr;
309 }
310 const VAddr page = addr >> registry_page_bits;
311 const auto it = registry.find(page);
312 if (it == registry.end()) {
313 return nullptr;
314 }
315 const auto& list = it->second;
316 const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
317 return surface->GetCpuAddr() == addr;
318 });
319 return found != list.end() ? *found : nullptr;
320 }
321 213
322 u64 Tick() { 214 /// Upload data from guest to an image
323 return ++ticks; 215 template <typename MapBuffer>
324 } 216 void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset);
325 217
326 void CommitAsyncFlushes() { 218 /// Find or create an image view from a guest descriptor
327 committed_flushes.push_back(uncommitted_flushes); 219 [[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
328 uncommitted_flushes.reset();
329 }
330 220
331 bool HasUncommittedFlushes() const { 221 /// Create a new image view from a guest descriptor
332 return uncommitted_flushes != nullptr; 222 [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
333 }
334 223
335 bool ShouldWaitAsyncFlushes() const { 224 /// Find or create an image from the given parameters
336 return !committed_flushes.empty() && committed_flushes.front() != nullptr; 225 [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
337 } 226 RelaxedOptions options = RelaxedOptions{});
338 227
339 void PopAsyncFlushes() { 228 /// Find an image from the given parameters
340 if (committed_flushes.empty()) { 229 [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
341 return; 230 RelaxedOptions options);
342 }
343 auto& flush_list = committed_flushes.front();
344 if (!flush_list) {
345 committed_flushes.pop_front();
346 return;
347 }
348 for (TSurface& surface : *flush_list) {
349 FlushSurface(surface);
350 }
351 committed_flushes.pop_front();
352 }
353 231
354protected: 232 /// Create an image from the given parameters
355 explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_, 233 [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
356 Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, 234 RelaxedOptions options);
357 bool is_astc_supported_)
358 : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
359 gpu_memory{gpu_memory_} {
360 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
361 SetEmptyColorBuffer(i);
362 }
363 235
364 SetEmptyDepthBuffer(); 236 /// Create a new image and join perfectly matching existing images
365 staging_cache.SetSize(2); 237 /// Remove joined images from the cache
238 [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
366 239
367 const auto make_siblings = [this](PixelFormat a, PixelFormat b) { 240 /// Return a blit image pair from the given guest blit parameters
368 siblings_table[static_cast<std::size_t>(a)] = b; 241 [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
369 siblings_table[static_cast<std::size_t>(b)] = a; 242 const Tegra::Engines::Fermi2D::Surface& src);
370 };
371 std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid);
372 make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM);
373 make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT);
374 make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT);
375 243
376 sampled_textures.reserve(64); 244 /// Find or create a sampler from a guest descriptor sampler
377 } 245 [[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
378 246
379 ~TextureCache() = default; 247 /// Find or create an image view for the given color buffer index
248 [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
380 249
381 virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; 250 /// Find or create an image view for the depth buffer
251 [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
382 252
383 virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, 253 /// Find or create a view for a render target with the given image parameters
384 const CopyParams& copy_params) = 0; 254 [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
255 bool is_clear);
385 256
386 virtual void ImageBlit(TView& src_view, TView& dst_view, 257 /// Iterates over all the images in a region calling func
387 const Tegra::Engines::Fermi2D::Config& copy_config) = 0; 258 template <typename Func>
259 void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
388 260
389 // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture 261 /// Find or create an image view in the given image with the passed parameters
390 // and reading it from a separate buffer. 262 [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
391 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
392 263
393 void ManageRenderTargetUnregister(TSurface& surface) { 264 /// Register image in the page table
394 auto& dirty = maxwell3d.dirty; 265 void RegisterImage(ImageId image);
395 const u32 index = surface->GetRenderTarget(); 266
396 if (index == DEPTH_RT) { 267 /// Unregister image from the page table
397 dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true; 268 void UnregisterImage(ImageId image);
398 } else { 269
399 dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true; 270 /// Track CPU reads and writes for image
400 } 271 void TrackImage(ImageBase& image);
401 dirty.flags[VideoCommon::Dirty::RenderTargets] = true; 272
273 /// Stop tracking CPU reads and writes for image
274 void UntrackImage(ImageBase& image);
275
276 /// Delete image from the cache
277 void DeleteImage(ImageId image);
278
279 /// Remove image views references from the cache
280 void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
281
282 /// Remove framebuffers using the given image views from the cache
283 void RemoveFramebuffers(std::span<const ImageViewId> removed_views);
284
285 /// Mark an image as modified from the GPU
286 void MarkModification(ImageBase& image) noexcept;
287
288 /// Synchronize image aliases, copying data if needed
289 void SynchronizeAliases(ImageId image_id);
290
291 /// Prepare an image to be used
292 void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
293
294 /// Prepare an image view to be used
295 void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
296
297 /// Execute copies from one image to the other, even if they are incompatible
298 void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies);
299
300 /// Bind an image view as render target, downloading resources preemtively if needed
301 void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
302
303 /// Create a render target from a given image and image view parameters
304 [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage(
305 ImageId, const ImageViewInfo& view_info);
306
307 /// Returns true if the current clear parameters clear the whole image of a given image view
308 [[nodiscard]] bool IsFullClear(ImageViewId id);
309
310 Runtime& runtime;
311 VideoCore::RasterizerInterface& rasterizer;
312 Tegra::Engines::Maxwell3D& maxwell3d;
313 Tegra::Engines::KeplerCompute& kepler_compute;
314 Tegra::MemoryManager& gpu_memory;
315
316 DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
317 DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
318 std::vector<SamplerId> graphics_sampler_ids;
319 std::vector<ImageViewId> graphics_image_view_ids;
320
321 DescriptorTable<TICEntry> compute_image_table{gpu_memory};
322 DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
323 std::vector<SamplerId> compute_sampler_ids;
324 std::vector<ImageViewId> compute_image_view_ids;
325
326 RenderTargets render_targets;
327
328 std::mutex mutex;
329
330 std::unordered_map<TICEntry, ImageViewId> image_views;
331 std::unordered_map<TSCEntry, SamplerId> samplers;
332 std::unordered_map<RenderTargets, FramebufferId> framebuffers;
333
334 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
335
336 bool has_deleted_images = false;
337
338 SlotVector<Image> slot_images;
339 SlotVector<ImageView> slot_image_views;
340 SlotVector<ImageAlloc> slot_image_allocs;
341 SlotVector<Sampler> slot_samplers;
342 SlotVector<Framebuffer> slot_framebuffers;
343
344 // TODO: This data structure is not optimal and it should be reworked
345 std::vector<ImageId> uncommitted_downloads;
346 std::queue<std::vector<ImageId>> committed_downloads;
347
348 static constexpr size_t TICKS_TO_DESTROY = 6;
349 DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
350 DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
351 DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers;
352
353 std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
354
355 u64 modification_tick = 0;
356 u64 frame_tick = 0;
357};
358
359template <class P>
360TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
361 Tegra::Engines::Maxwell3D& maxwell3d_,
362 Tegra::Engines::KeplerCompute& kepler_compute_,
363 Tegra::MemoryManager& gpu_memory_)
364 : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
365 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
366 // Configure null sampler
367 TSCEntry sampler_descriptor{};
368 sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
369 sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear);
370 sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
371 sampler_descriptor.cubemap_anisotropy.Assign(1);
372
373 // Make sure the first index is reserved for the null resources
374 // This way the null resource becomes a compile time constant
375 void(slot_image_views.insert(runtime, NullImageParams{}));
376 void(slot_samplers.insert(runtime, sampler_descriptor));
377}
378
379template <class P>
380void TextureCache<P>::TickFrame() {
381 // Tick sentenced resources in this order to ensure they are destroyed in the right order
382 sentenced_images.Tick();
383 sentenced_framebuffers.Tick();
384 sentenced_image_view.Tick();
385 ++frame_tick;
386}
387
388template <class P>
389std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() {
390 return std::unique_lock{mutex};
391}
392
393template <class P>
394const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept {
395 return slot_image_views[id];
396}
397
398template <class P>
399typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
400 return slot_image_views[id];
401}
402
403template <class P>
404void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
405 std::span<ImageViewId> image_view_ids) {
406 FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
407}
408
409template <class P>
410void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
411 std::span<ImageViewId> image_view_ids) {
412 FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids);
413}
414
415template <class P>
416typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
417 [[unlikely]] if (index > graphics_sampler_table.Limit()) {
418 LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
419 return &slot_samplers[NULL_SAMPLER_ID];
420 }
421 const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
422 SamplerId& id = graphics_sampler_ids[index];
423 [[unlikely]] if (is_new) {
424 id = FindSampler(descriptor);
402 } 425 }
426 return &slot_samplers[id];
427}
403 428
404 void Register(TSurface surface) { 429template <class P>
405 const GPUVAddr gpu_addr = surface->GetGpuAddr(); 430typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
406 const std::size_t size = surface->GetSizeInBytes(); 431 [[unlikely]] if (index > compute_sampler_table.Limit()) {
407 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 432 LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
408 if (!cpu_addr) { 433 return &slot_samplers[NULL_SAMPLER_ID];
409 LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", 434 }
410 gpu_addr); 435 const auto [descriptor, is_new] = compute_sampler_table.Read(index);
411 return; 436 SamplerId& id = compute_sampler_ids[index];
412 } 437 [[unlikely]] if (is_new) {
413 surface->SetCpuAddr(*cpu_addr); 438 id = FindSampler(descriptor);
414 RegisterInnerCache(surface);
415 surface->MarkAsRegistered(true);
416 surface->SetMemoryMarked(true);
417 rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
418 } 439 }
440 return &slot_samplers[id];
441}
419 442
420 void UnmarkMemory(TSurface surface) { 443template <class P>
421 if (!surface->IsMemoryMarked()) { 444void TextureCache<P>::SynchronizeGraphicsDescriptors() {
422 return; 445 using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
423 } 446 const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex;
424 const std::size_t size = surface->GetSizeInBytes(); 447 const u32 tic_limit = maxwell3d.regs.tic.limit;
425 const VAddr cpu_addr = surface->GetCpuAddr(); 448 const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit;
426 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); 449 if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) {
427 surface->SetMemoryMarked(false); 450 graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
428 } 451 }
452 if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) {
453 graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
454 }
455}
429 456
430 void Unregister(TSurface surface) { 457template <class P>
431 if (guard_render_targets && surface->IsProtected()) { 458void TextureCache<P>::SynchronizeComputeDescriptors() {
432 return; 459 const bool linked_tsc = kepler_compute.launch_description.linked_tsc;
433 } 460 const u32 tic_limit = kepler_compute.regs.tic.limit;
434 if (!guard_render_targets && surface->IsRenderTarget()) { 461 const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit;
435 ManageRenderTargetUnregister(surface); 462 const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address();
436 } 463 if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
437 UnmarkMemory(surface); 464 compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
438 if (surface->IsSyncPending()) {
439 marked_for_unregister.remove(surface);
440 surface->SetSyncPending(false);
441 }
442 UnregisterInnerCache(surface);
443 surface->MarkAsRegistered(false);
444 ReserveSurface(surface->GetSurfaceParams(), surface);
445 } 465 }
466 if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) {
467 compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
468 }
469}
446 470
447 TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { 471template <class P>
448 if (const auto surface = TryGetReservedSurface(params); surface) { 472void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
449 surface->SetGpuAddr(gpu_addr); 473 using namespace VideoCommon::Dirty;
450 return surface; 474 auto& flags = maxwell3d.dirty.flags;
451 } 475 if (!flags[Dirty::RenderTargets]) {
452 // No reserved surface available, create a new one and reserve it 476 return;
453 auto new_surface{CreateSurface(gpu_addr, params)};
454 return new_surface;
455 } 477 }
478 flags[Dirty::RenderTargets] = false;
456 479
457 const bool is_astc_supported; 480 // Render target control is used on all render targets, so force look ups when this one is up
481 const bool force = flags[Dirty::RenderTargetControl];
482 flags[Dirty::RenderTargetControl] = false;
458 483
459private: 484 for (size_t index = 0; index < NUM_RT; ++index) {
460 enum class RecycleStrategy : u32 { 485 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
461 Ignore = 0, 486 if (flags[Dirty::ColorBuffer0 + index] || force) {
462 Flush = 1, 487 flags[Dirty::ColorBuffer0 + index] = false;
463 BufferCopy = 3, 488 BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
464 }; 489 }
490 PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
491 }
492 if (flags[Dirty::ZetaBuffer] || force) {
493 flags[Dirty::ZetaBuffer] = false;
494 BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
495 }
496 const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
497 PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
465 498
466 enum class DeductionType : u32 { 499 for (size_t index = 0; index < NUM_RT; ++index) {
467 DeductionComplete, 500 render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
468 DeductionIncomplete, 501 }
469 DeductionFailed, 502 render_targets.size = Extent2D{
503 maxwell3d.regs.render_area.width,
504 maxwell3d.regs.render_area.height,
470 }; 505 };
506}
471 507
472 struct Deduction { 508template <class P>
473 DeductionType type{DeductionType::DeductionFailed}; 509typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
474 TSurface surface{}; 510 return &slot_framebuffers[GetFramebufferId(render_targets)];
511}
475 512
476 bool Failed() const { 513template <class P>
477 return type == DeductionType::DeductionFailed; 514void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
478 } 515 std::span<ImageViewId> cached_image_view_ids,
516 std::span<const u32> indices,
517 std::span<ImageViewId> image_view_ids) {
518 ASSERT(indices.size() <= image_view_ids.size());
519 do {
520 has_deleted_images = false;
521 std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) {
522 return VisitImageView(table, cached_image_view_ids, index);
523 });
524 } while (has_deleted_images);
525}
479 526
480 bool Incomplete() const { 527template <class P>
481 return type == DeductionType::DeductionIncomplete; 528ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
482 } 529 std::span<ImageViewId> cached_image_view_ids,
530 u32 index) {
531 if (index > table.Limit()) {
532 LOG_ERROR(HW_GPU, "Invalid image view index={}", index);
533 return NULL_IMAGE_VIEW_ID;
534 }
535 const auto [descriptor, is_new] = table.Read(index);
536 ImageViewId& image_view_id = cached_image_view_ids[index];
537 if (is_new) {
538 image_view_id = FindImageView(descriptor);
539 }
540 if (image_view_id != NULL_IMAGE_VIEW_ID) {
541 PrepareImageView(image_view_id, false, false);
542 }
543 return image_view_id;
544}
483 545
484 bool IsDepth() const { 546template <class P>
485 return surface->GetSurfaceParams().IsPixelFormatZeta(); 547FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) {
486 } 548 const auto [pair, is_new] = framebuffers.try_emplace(key);
487 }; 549 FramebufferId& framebuffer_id = pair->second;
550 if (!is_new) {
551 return framebuffer_id;
552 }
553 std::array<ImageView*, NUM_RT> color_buffers;
554 std::ranges::transform(key.color_buffer_ids, color_buffers.begin(),
555 [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; });
556 ImageView* const depth_buffer =
557 key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr;
558 framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key);
559 return framebuffer_id;
560}
488 561
489 /** 562template <class P>
490 * Takes care of selecting a proper strategy to deal with a texture recycle. 563void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
491 * 564 ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) {
492 * @param overlaps The overlapping surfaces registered in the cache. 565 if (True(image.flags & ImageFlagBits::CpuModified)) {
493 * @param params The parameters on the new surface. 566 return;
494 * @param gpu_addr The starting address of the new surface.
495 * @param untopological Indicates to the recycler that the texture has no way
496 * to match the overlaps due to topological reasons.
497 **/
498 RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
499 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
500 if (Settings::IsGPULevelExtreme()) {
501 return RecycleStrategy::Flush;
502 }
503 // 3D Textures decision
504 if (params.target == SurfaceTarget::Texture3D) {
505 return RecycleStrategy::Flush;
506 }
507 for (const auto& s : overlaps) {
508 const auto& s_params = s->GetSurfaceParams();
509 if (s_params.target == SurfaceTarget::Texture3D) {
510 return RecycleStrategy::Flush;
511 }
512 }
513 // Untopological decision
514 if (untopological == MatchTopologyResult::CompressUnmatch) {
515 return RecycleStrategy::Flush;
516 }
517 if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) {
518 return RecycleStrategy::Flush;
519 }
520 return RecycleStrategy::Ignore;
521 }
522
523 /**
524 * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented
525 * strategies: Ignore and Flush.
526 *
527 * - Ignore: Just unregisters all the overlaps and loads the new texture.
528 * - Flush: Flushes all the overlaps into memory and loads the new surface from that data.
529 *
530 * @param overlaps The overlapping surfaces registered in the cache.
531 * @param params The parameters for the new surface.
532 * @param gpu_addr The starting address of the new surface.
533 * @param preserve_contents Indicates that the new surface should be loaded from memory or left
534 * blank.
535 * @param untopological Indicates to the recycler that the texture has no way to match the
536 * overlaps due to topological reasons.
537 **/
538 std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
539 const GPUVAddr gpu_addr, const bool preserve_contents,
540 const MatchTopologyResult untopological) {
541 const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
542 for (auto& surface : overlaps) {
543 Unregister(surface);
544 }
545 switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
546 case RecycleStrategy::Ignore: {
547 return InitializeSurface(gpu_addr, params, do_load);
548 }
549 case RecycleStrategy::Flush: {
550 std::sort(overlaps.begin(), overlaps.end(),
551 [](const TSurface& a, const TSurface& b) -> bool {
552 return a->GetModificationTick() < b->GetModificationTick();
553 });
554 for (auto& surface : overlaps) {
555 FlushSurface(surface);
556 }
557 return InitializeSurface(gpu_addr, params, preserve_contents);
558 } 567 }
559 case RecycleStrategy::BufferCopy: { 568 image.flags |= ImageFlagBits::CpuModified;
560 auto new_surface = GetUncachedSurface(gpu_addr, params); 569 UntrackImage(image);
561 BufferCopy(overlaps[0], new_surface); 570 });
562 return {new_surface, new_surface->GetMainView()}; 571}
572
573template <class P>
574void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
575 std::vector<ImageId> images;
576 ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
577 // Skip images that were not modified from the GPU
578 if (False(image.flags & ImageFlagBits::GpuModified)) {
579 return;
563 } 580 }
564 default: { 581 // Skip images that .are. modified from the CPU
565 UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); 582 // We don't want to write sensitive data from the guest
566 return InitializeSurface(gpu_addr, params, do_load); 583 if (True(image.flags & ImageFlagBits::CpuModified)) {
584 return;
567 } 585 }
586 if (image.info.num_samples > 1) {
587 LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
588 return;
568 } 589 }
590 image.flags &= ~ImageFlagBits::GpuModified;
591 images.push_back(image_id);
592 });
593 if (images.empty()) {
594 return;
595 }
596 std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) {
597 return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
598 });
599 for (const ImageId image_id : images) {
600 Image& image = slot_images[image_id];
601 auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes);
602 const auto copies = FullDownloadCopies(image.info);
603 image.DownloadMemory(map, 0, copies);
604 runtime.Finish();
605 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span());
569 } 606 }
607}
570 608
571 /** 609template <class P>
572 * Takes a single surface and recreates into another that may differ in 610void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
573 * format, target or width alignment. 611 std::vector<ImageId> deleted_images;
574 * 612 ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
575 * @param current_surface The registered surface in the cache which we want to convert. 613 for (const ImageId id : deleted_images) {
576 * @param params The new surface params which we'll use to recreate the surface. 614 Image& image = slot_images[id];
577 * @param is_render Whether or not the surface is a render target. 615 if (True(image.flags & ImageFlagBits::Tracked)) {
578 **/ 616 UntrackImage(image);
579 std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, 617 }
580 bool is_render) { 618 UnregisterImage(id);
581 const auto gpu_addr = current_surface->GetGpuAddr(); 619 DeleteImage(id);
582 const auto& cr_params = current_surface->GetSurfaceParams(); 620 }
583 TSurface new_surface; 621}
584 if (cr_params.pixel_format != params.pixel_format && !is_render &&
585 GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) {
586 SurfaceParams new_params = params;
587 new_params.pixel_format = cr_params.pixel_format;
588 new_params.type = cr_params.type;
589 new_surface = GetUncachedSurface(gpu_addr, new_params);
590 } else {
591 new_surface = GetUncachedSurface(gpu_addr, params);
592 }
593 const SurfaceParams& final_params = new_surface->GetSurfaceParams();
594 if (cr_params.type != final_params.type) {
595 if (Settings::IsGPULevelExtreme()) {
596 BufferCopy(current_surface, new_surface);
597 }
598 } else {
599 std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
600 for (auto& brick : bricks) {
601 TryCopyImage(current_surface, new_surface, brick);
602 }
603 }
604 Unregister(current_surface);
605 Register(new_surface);
606 new_surface->MarkAsModified(current_surface->IsModified(), Tick());
607 return {new_surface, new_surface->GetMainView()};
608 }
609
610 /**
611 * Takes a single surface and checks with the new surface's params if it's an exact
612 * match, we return the main view of the registered surface. If its formats don't
613 * match, we rebuild the surface. We call this last method a `Mirage`. If formats
614 * match but the targets don't, we create an overview View of the registered surface.
615 *
616 * @param current_surface The registered surface in the cache which we want to convert.
617 * @param params The new surface params which we want to check.
618 * @param is_render Whether or not the surface is a render target.
619 **/
620 std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface,
621 const SurfaceParams& params, bool is_render) {
622 const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
623 const bool matches_target = current_surface->MatchTarget(params.target);
624 const auto match_check = [&]() -> std::pair<TSurface, TView> {
625 if (matches_target) {
626 return {current_surface, current_surface->GetMainView()};
627 }
628 return {current_surface, current_surface->EmplaceOverview(params)};
629 };
630 if (!is_mirage) {
631 return match_check();
632 }
633 if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) {
634 return match_check();
635 }
636 return RebuildSurface(current_surface, params, is_render);
637 }
638
639 /**
640 * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate
641 * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps
642 * of the new surface, if they all match we end up recreating a surface for them,
643 * else we return nothing.
644 *
645 * @param overlaps The overlapping surfaces registered in the cache.
646 * @param params The parameters on the new surface.
647 * @param gpu_addr The starting address of the new surface.
648 **/
649 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
650 const SurfaceParams& params,
651 GPUVAddr gpu_addr) {
652 if (params.target == SurfaceTarget::Texture3D) {
653 return std::nullopt;
654 }
655 const auto test_modified = [](TSurface& surface) { return surface->IsModified(); };
656 TSurface new_surface = GetUncachedSurface(gpu_addr, params);
657 622
658 if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) { 623template <class P>
659 LoadSurface(new_surface); 624void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
660 for (const auto& surface : overlaps) { 625 const Tegra::Engines::Fermi2D::Surface& src,
661 Unregister(surface); 626 const Tegra::Engines::Fermi2D::Config& copy) {
662 } 627 const BlitImages images = GetBlitImages(dst, src);
663 Register(new_surface); 628 const ImageId dst_id = images.dst_id;
664 return {{new_surface, new_surface->GetMainView()}}; 629 const ImageId src_id = images.src_id;
665 } 630 PrepareImage(src_id, false, false);
631 PrepareImage(dst_id, true, false);
632
633 ImageBase& dst_image = slot_images[dst_id];
634 const ImageBase& src_image = slot_images[src_id];
635
636 // TODO: Deduplicate
637 const std::optional dst_base = dst_image.TryFindBase(dst.Address());
638 const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
639 const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
640 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
641 const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
642 const std::array src_region{
643 Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
644 Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
645 };
666 646
667 std::size_t passed_tests = 0; 647 const std::optional src_base = src_image.TryFindBase(src.Address());
668 for (auto& surface : overlaps) { 648 const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
669 const SurfaceParams& src_params = surface->GetSurfaceParams(); 649 const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
670 const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; 650 const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
671 if (!mipmap_layer) { 651 const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
672 continue; 652 const std::array dst_region{
673 } 653 Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
674 const auto [base_layer, base_mipmap] = *mipmap_layer; 654 Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
675 if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { 655 };
676 continue;
677 }
678 ++passed_tests;
679
680 // Copy all mipmaps and layers
681 const u32 block_width = params.GetDefaultBlockWidth();
682 const u32 block_height = params.GetDefaultBlockHeight();
683 for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) {
684 const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap);
685 const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap);
686 if (width < block_width || height < block_height) {
687 // Current APIs forbid copying small compressed textures, avoid errors
688 break;
689 }
690 const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height,
691 src_params.depth);
692 TryCopyImage(surface, new_surface, copy_params);
693 }
694 }
695 if (passed_tests == 0) {
696 return std::nullopt;
697 }
698 if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
699 // In Accurate GPU all tests should pass, else we recycle
700 return std::nullopt;
701 }
702 656
703 const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified); 657 // Always call this after src_framebuffer_id was queried, as the address might be invalidated.
704 for (const auto& surface : overlaps) { 658 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
705 Unregister(surface); 659 if constexpr (FRAMEBUFFER_BLITS) {
706 } 660 // OpenGL blits from framebuffers, not images
661 Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id];
662 runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region,
663 copy.filter, copy.operation);
664 } else {
665 // Vulkan can blit images, but it lacks format reinterpretations
666 // Provide a framebuffer in case it's necessary
667 ImageView& dst_view = slot_image_views[dst_view_id];
668 ImageView& src_view = slot_image_views[src_view_id];
669 runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
670 copy.operation);
671 }
672}
707 673
708 new_surface->MarkAsModified(modified, Tick()); 674template <class P>
709 Register(new_surface); 675void TextureCache<P>::InvalidateColorBuffer(size_t index) {
710 return {{new_surface, new_surface->GetMainView()}}; 676 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
711 } 677 color_buffer_id = FindColorBuffer(index, false);
712 678 if (!color_buffer_id) {
713 /** 679 LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index);
714 * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D 680 return;
715 * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of 681 }
716 * the HLE methods. 682 // When invalidating a color buffer, the old contents are no longer relevant
717 * 683 ImageView& color_buffer = slot_image_views[color_buffer_id];
718 * @param overlaps The overlapping surfaces registered in the cache. 684 Image& image = slot_images[color_buffer.image_id];
719 * @param params The parameters on the new surface. 685 image.flags &= ~ImageFlagBits::CpuModified;
720 * @param gpu_addr The starting address of the new surface. 686 image.flags &= ~ImageFlagBits::GpuModified;
721 * @param cpu_addr The starting address of the new surface on physical memory.
722 * @param preserve_contents Indicates that the new surface should be loaded from memory or
723 * left blank.
724 */
725 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
726 const SurfaceParams& params,
727 GPUVAddr gpu_addr, VAddr cpu_addr,
728 bool preserve_contents) {
729 if (params.target != SurfaceTarget::Texture3D) {
730 for (const auto& surface : overlaps) {
731 if (!surface->MatchTarget(params.target)) {
732 if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
733 if (Settings::IsGPULevelExtreme()) {
734 return std::nullopt;
735 }
736 Unregister(surface);
737 return InitializeSurface(gpu_addr, params, preserve_contents);
738 }
739 return std::nullopt;
740 }
741 if (surface->GetCpuAddr() != cpu_addr) {
742 continue;
743 }
744 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
745 return std::make_pair(surface, surface->GetMainView());
746 }
747 }
748 return InitializeSurface(gpu_addr, params, preserve_contents);
749 }
750 687
751 if (params.num_levels > 1) { 688 runtime.InvalidateColorBuffer(color_buffer, index);
752 // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach 689}
753 return std::nullopt;
754 }
755 690
756 if (overlaps.size() == 1) { 691template <class P>
757 const auto& surface = overlaps[0]; 692void TextureCache<P>::InvalidateDepthBuffer() {
758 const SurfaceParams& overlap_params = surface->GetSurfaceParams(); 693 ImageViewId& depth_buffer_id = render_targets.depth_buffer_id;
759 // Don't attempt to render to textures with more than one level for now 694 depth_buffer_id = FindDepthBuffer(false);
760 // The texture has to be to the right or the sample address if we want to render to it 695 if (!depth_buffer_id) {
761 if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { 696 LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer");
762 const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr()); 697 return;
763 const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); 698 }
764 if (slice < overlap_params.depth) { 699 // When invalidating the depth buffer, the old contents are no longer relevant
765 auto view = surface->Emplace3DView(slice, params.depth, 0, 1); 700 ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id];
766 return std::make_pair(std::move(surface), std::move(view)); 701 image.flags &= ~ImageFlagBits::CpuModified;
767 } 702 image.flags &= ~ImageFlagBits::GpuModified;
768 }
769 }
770 703
771 TSurface new_surface = GetUncachedSurface(gpu_addr, params); 704 ImageView& depth_buffer = slot_image_views[depth_buffer_id];
772 bool modified = false; 705 runtime.InvalidateDepthBuffer(depth_buffer);
706}
773 707
774 for (auto& surface : overlaps) { 708template <class P>
775 const SurfaceParams& src_params = surface->GetSurfaceParams(); 709typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) {
776 if (src_params.target != SurfaceTarget::Texture2D || 710 // TODO: Properly implement this
777 src_params.height != params.height || 711 const auto it = page_table.find(cpu_addr >> PAGE_BITS);
778 src_params.block_depth != params.block_depth || 712 if (it == page_table.end()) {
779 src_params.block_height != params.block_height) { 713 return nullptr;
780 return std::nullopt; 714 }
781 } 715 const auto& image_ids = it->second;
782 modified |= surface->IsModified(); 716 for (const ImageId image_id : image_ids) {
783 717 const ImageBase& image = slot_images[image_id];
784 const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); 718 if (image.cpu_addr != cpu_addr) {
785 const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); 719 continue;
786 const u32 width = params.width;
787 const u32 height = params.height;
788 const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1);
789 TryCopyImage(surface, new_surface, copy_params);
790 } 720 }
791 for (const auto& surface : overlaps) { 721 if (image.image_view_ids.empty()) {
792 Unregister(surface); 722 continue;
793 } 723 }
794 new_surface->MarkAsModified(modified, Tick()); 724 return &slot_image_views[image.image_view_ids.at(0)];
795 Register(new_surface); 725 }
796 726 return nullptr;
797 TView view = new_surface->GetMainView(); 727}
798 return std::make_pair(std::move(new_surface), std::move(view));
799 }
800
801 /**
802 * Gets the starting address and parameters of a candidate surface and tries
803 * to find a matching surface within the cache. This is done in 3 big steps:
804 *
805 * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
806 *
807 * 2. Check if there are any overlaps at all, if there are none, we just load the texture from
808 * memory else we move to step 3.
809 *
810 * 3. Consists of figuring out the relationship between the candidate texture and the
811 * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If
812 * there's many, we just try to reconstruct a new surface out of them based on the
813 * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we
814 * have to check if the candidate is a view (layer/mipmap) of the overlap or if the
815 * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct
816 * a new surface.
817 *
818 * @param gpu_addr The starting address of the candidate surface.
819 * @param params The parameters on the candidate surface.
820 * @param preserve_contents Indicates that the new surface should be loaded from memory or
821 * left blank.
822 * @param is_render Whether or not the surface is a render target.
823 **/
824 std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
825 const SurfaceParams& params, bool preserve_contents,
826 bool is_render) {
827 // Step 1
828 // Check Level 1 Cache for a fast structural match. If candidate surface
829 // matches at certain level we are pretty much done.
830 if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) {
831 TSurface& current_surface = iter->second;
832 const auto topological_result = current_surface->MatchesTopology(params);
833 if (topological_result != MatchTopologyResult::FullMatch) {
834 VectorSurface overlaps{current_surface};
835 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
836 topological_result);
837 }
838 728
839 const auto struct_result = current_surface->MatchesStructure(params); 729template <class P>
840 if (struct_result != MatchStructureResult::None) { 730bool TextureCache<P>::HasUncommittedFlushes() const noexcept {
841 const auto& old_params = current_surface->GetSurfaceParams(); 731 return !uncommitted_downloads.empty();
842 const bool not_3d = params.target != SurfaceTarget::Texture3D && 732}
843 old_params.target != SurfaceTarget::Texture3D;
844 if (not_3d || current_surface->MatchTarget(params.target)) {
845 if (struct_result == MatchStructureResult::FullMatch) {
846 return ManageStructuralMatch(current_surface, params, is_render);
847 } else {
848 return RebuildSurface(current_surface, params, is_render);
849 }
850 }
851 }
852 }
853 733
854 // Step 2 734template <class P>
855 // Obtain all possible overlaps in the memory region 735bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept {
856 const std::size_t candidate_size = params.GetGuestSizeInBytes(); 736 return !committed_downloads.empty() && !committed_downloads.front().empty();
857 auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)}; 737}
858 738
859 // If none are found, we are done. we just load the surface and create it. 739template <class P>
860 if (overlaps.empty()) { 740void TextureCache<P>::CommitAsyncFlushes() {
861 return InitializeSurface(gpu_addr, params, preserve_contents); 741 // This is intentionally passing the value by copy
862 } 742 committed_downloads.push(uncommitted_downloads);
743 uncommitted_downloads.clear();
744}
863 745
864 // Step 3 746template <class P>
865 // Now we need to figure the relationship between the texture and its overlaps 747void TextureCache<P>::PopAsyncFlushes() {
866 // we do a topological test to ensure we can find some relationship. If it fails 748 if (committed_downloads.empty()) {
867 // immediately recycle the texture 749 return;
868 for (const auto& surface : overlaps) { 750 }
869 const auto topological_result = surface->MatchesTopology(params); 751 const std::span<const ImageId> download_ids = committed_downloads.front();
870 if (topological_result != MatchTopologyResult::FullMatch) { 752 if (download_ids.empty()) {
871 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 753 committed_downloads.pop();
872 topological_result); 754 return;
873 } 755 }
874 } 756 size_t total_size_bytes = 0;
757 for (const ImageId image_id : download_ids) {
758 total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
759 }
760 auto download_map = runtime.MapDownloadBuffer(total_size_bytes);
761 size_t buffer_offset = 0;
762 for (const ImageId image_id : download_ids) {
763 Image& image = slot_images[image_id];
764 const auto copies = FullDownloadCopies(image.info);
765 image.DownloadMemory(download_map, buffer_offset, copies);
766 buffer_offset += image.unswizzled_size_bytes;
767 }
768 // Wait for downloads to finish
769 runtime.Finish();
770
771 buffer_offset = 0;
772 const std::span<u8> download_span = download_map.Span();
773 for (const ImageId image_id : download_ids) {
774 const ImageBase& image = slot_images[image_id];
775 const auto copies = FullDownloadCopies(image.info);
776 const std::span<u8> image_download_span = download_span.subspan(buffer_offset);
777 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span);
778 buffer_offset += image.unswizzled_size_bytes;
779 }
780 committed_downloads.pop();
781}
875 782
876 // Manage 3D textures 783template <class P>
877 if (params.block_depth > 0) { 784bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
878 auto surface = 785 bool is_modified = false;
879 Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); 786 ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) {
880 if (surface) { 787 if (False(image.flags & ImageFlagBits::GpuModified)) {
881 return *surface; 788 return false;
882 }
883 } 789 }
790 is_modified = true;
791 return true;
792 });
793 return is_modified;
794}
884 795
885 // Split cases between 1 overlap or many. 796template <class P>
886 if (overlaps.size() == 1) { 797void TextureCache<P>::RefreshContents(Image& image) {
887 TSurface current_surface = overlaps[0]; 798 if (False(image.flags & ImageFlagBits::CpuModified)) {
888 // First check if the surface is within the overlap. If not, it means 799 // Only upload modified images
889 // two things either the candidate surface is a supertexture of the overlap 800 return;
890 // or they don't match in any known way. 801 }
891 if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { 802 image.flags &= ~ImageFlagBits::CpuModified;
892 const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr); 803 TrackImage(image);
893 if (view) {
894 return *view;
895 }
896 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
897 MatchTopologyResult::FullMatch);
898 }
899 // Now we check if the candidate is a mipmap/layer of the overlap
900 std::optional<TView> view =
901 current_surface->EmplaceView(params, gpu_addr, candidate_size);
902 if (view) {
903 const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
904 if (is_mirage) {
905 // On a mirage view, we need to recreate the surface under this new view
906 // and then obtain a view again.
907 SurfaceParams new_params = current_surface->GetSurfaceParams();
908 const u32 wh = SurfaceParams::ConvertWidth(
909 new_params.width, new_params.pixel_format, params.pixel_format);
910 const u32 hh = SurfaceParams::ConvertHeight(
911 new_params.height, new_params.pixel_format, params.pixel_format);
912 new_params.width = wh;
913 new_params.height = hh;
914 new_params.pixel_format = params.pixel_format;
915 std::pair<TSurface, TView> pair =
916 RebuildSurface(current_surface, new_params, is_render);
917 std::optional<TView> mirage_view =
918 pair.first->EmplaceView(params, gpu_addr, candidate_size);
919 if (mirage_view)
920 return {pair.first, *mirage_view};
921 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
922 MatchTopologyResult::FullMatch);
923 }
924 return {current_surface, *view};
925 }
926 } else {
927 // If there are many overlaps, odds are they are subtextures of the candidate
928 // surface. We try to construct a new surface based on the candidate parameters,
929 // using the overlaps. If a single overlap fails, this will fail.
930 std::optional<std::pair<TSurface, TView>> view =
931 TryReconstructSurface(overlaps, params, gpu_addr);
932 if (view) {
933 return *view;
934 }
935 }
936 // We failed all the tests, recycle the overlaps into a new texture.
937 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
938 MatchTopologyResult::FullMatch);
939 }
940
941 /**
942 * Gets the starting address and parameters of a candidate surface and tries to find a
943 * matching surface within the cache that's similar to it. If there are many textures
944 * or the texture found if entirely incompatible, it will fail. If no texture is found, the
945 * blit will be unsuccessful.
946 *
947 * @param gpu_addr The starting address of the candidate surface.
948 * @param params The parameters on the candidate surface.
949 **/
950 Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
951 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
952
953 if (!cpu_addr) {
954 Deduction result{};
955 result.type = DeductionType::DeductionFailed;
956 return result;
957 }
958 804
959 if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) { 805 if (image.info.num_samples > 1) {
960 TSurface& current_surface = iter->second; 806 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
961 const auto topological_result = current_surface->MatchesTopology(params); 807 return;
962 if (topological_result != MatchTopologyResult::FullMatch) { 808 }
963 Deduction result{}; 809 auto map = runtime.MapUploadBuffer(MapSizeBytes(image));
964 result.type = DeductionType::DeductionFailed; 810 UploadImageContents(image, map, 0);
965 return result; 811 runtime.InsertUploadMemoryBarrier();
966 } 812}
967 const auto struct_result = current_surface->MatchesStructure(params);
968 if (struct_result != MatchStructureResult::None &&
969 current_surface->MatchTarget(params.target)) {
970 Deduction result{};
971 result.type = DeductionType::DeductionComplete;
972 result.surface = current_surface;
973 return result;
974 }
975 }
976 813
977 const std::size_t candidate_size = params.GetGuestSizeInBytes(); 814template <class P>
978 auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)}; 815template <typename MapBuffer>
816void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) {
817 const std::span<u8> mapped_span = map.Span().subspan(buffer_offset);
818 const GPUVAddr gpu_addr = image.gpu_addr;
819
820 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
821 gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
822 const auto uploads = FullUploadSwizzles(image.info);
823 runtime.AccelerateImageUpload(image, map, buffer_offset, uploads);
824 } else if (True(image.flags & ImageFlagBits::Converted)) {
825 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
826 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
827 ConvertImage(unswizzled_data, image.info, mapped_span, copies);
828 image.UploadMemory(map, buffer_offset, copies);
829 } else if (image.info.type == ImageType::Buffer) {
830 const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)};
831 image.UploadMemory(map, buffer_offset, copies);
832 } else {
833 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
834 image.UploadMemory(map, buffer_offset, copies);
835 }
836}
979 837
980 if (overlaps.empty()) { 838template <class P>
981 Deduction result{}; 839ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
982 result.type = DeductionType::DeductionIncomplete; 840 if (!IsValidAddress(gpu_memory, config)) {
983 return result; 841 return NULL_IMAGE_VIEW_ID;
984 } 842 }
843 const auto [pair, is_new] = image_views.try_emplace(config);
844 ImageViewId& image_view_id = pair->second;
845 if (is_new) {
846 image_view_id = CreateImageView(config);
847 }
848 return image_view_id;
849}
985 850
986 if (overlaps.size() > 1) { 851template <class P>
987 Deduction result{}; 852ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) {
988 result.type = DeductionType::DeductionFailed; 853 const ImageInfo info(config);
989 return result; 854 const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride;
990 } else { 855 const ImageId image_id = FindOrInsertImage(info, image_gpu_addr);
991 Deduction result{}; 856 if (!image_id) {
992 result.type = DeductionType::DeductionComplete; 857 return NULL_IMAGE_VIEW_ID;
993 result.surface = overlaps[0];
994 return result;
995 }
996 } 858 }
859 ImageBase& image = slot_images[image_id];
860 const SubresourceBase base = image.TryFindBase(config.Address()).value();
861 ASSERT(base.level == 0);
862 const ImageViewInfo view_info(config, base.layer);
863 const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info);
864 ImageViewBase& image_view = slot_image_views[image_view_id];
865 image_view.flags |= ImageViewFlagBits::Strong;
866 image.flags |= ImageFlagBits::Strong;
867 return image_view_id;
868}
997 869
998 /** 870template <class P>
999 * Gets a null surface based on a target texture. 871ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
1000 * @param target The target of the null surface. 872 RelaxedOptions options) {
1001 */ 873 if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) {
1002 TView GetNullSurface(SurfaceTarget target) { 874 return image_id;
1003 const u32 i_target = static_cast<u32>(target); 875 }
1004 if (const auto it = invalid_cache.find(i_target); it != invalid_cache.end()) { 876 return InsertImage(info, gpu_addr, options);
1005 return it->second->GetMainView(); 877}
1006 } 878
1007 SurfaceParams params{}; 879template <class P>
1008 params.target = target; 880ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
1009 params.is_tiled = false; 881 RelaxedOptions options) {
1010 params.srgb_conversion = false; 882 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1011 params.is_layered = 883 if (!cpu_addr) {
1012 target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray || 884 return ImageId{};
1013 target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray; 885 }
1014 params.block_width = 0; 886 const bool broken_views = runtime.HasBrokenTextureViewFormats();
1015 params.block_height = 0; 887 ImageId image_id;
1016 params.block_depth = 0; 888 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
1017 params.tile_width_spacing = 1; 889 if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
1018 params.width = 1; 890 const bool strict_size = False(options & RelaxedOptions::Size) &&
1019 params.height = 1; 891 True(existing_image.flags & ImageFlagBits::Strong);
1020 params.depth = 1; 892 const ImageInfo& existing = existing_image.info;
1021 if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) { 893 if (existing_image.gpu_addr == gpu_addr && existing.type == info.type &&
1022 params.depth = 6; 894 existing.pitch == info.pitch &&
1023 } 895 IsPitchLinearSameSize(existing, info, strict_size) &&
1024 params.pitch = 4; 896 IsViewCompatible(existing.format, info.format, broken_views)) {
1025 params.num_levels = 1; 897 image_id = existing_image_id;
1026 params.emulated_levels = 1; 898 return true;
1027 params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM; 899 }
1028 params.type = VideoCore::Surface::SurfaceType::ColorTexture; 900 } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views)) {
1029 auto surface = CreateSurface(0ULL, params); 901 image_id = existing_image_id;
1030 invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); 902 return true;
1031 surface->UploadTexture(invalid_memory);
1032 surface->MarkAsModified(false, Tick());
1033 invalid_cache.emplace(i_target, surface);
1034 return surface->GetMainView();
1035 }
1036
1037 /**
1038 * Gets the a source and destination starting address and parameters,
1039 * and tries to deduce if they are supposed to be depth textures. If so, their
1040 * parameters are modified and fixed into so.
1041 *
1042 * @param src_params The parameters of the candidate surface.
1043 * @param dst_params The parameters of the destination surface.
1044 * @param src_gpu_addr The starting address of the candidate surface.
1045 * @param dst_gpu_addr The starting address of the destination surface.
1046 **/
1047 void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
1048 const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
1049 auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
1050 auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params);
1051 if (deduced_src.Failed() || deduced_dst.Failed()) {
1052 return;
1053 } 903 }
904 return false;
905 };
906 ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda);
907 return image_id;
908}
1054 909
1055 const bool incomplete_src = deduced_src.Incomplete(); 910template <class P>
1056 const bool incomplete_dst = deduced_dst.Incomplete(); 911ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
912 RelaxedOptions options) {
913 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
914 ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
915 const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
916 const Image& image = slot_images[image_id];
917 // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different
918 const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr);
919 if (is_new) {
920 it->second = slot_image_allocs.insert();
921 }
922 slot_image_allocs[it->second].images.push_back(image_id);
923 return image_id;
924}
1057 925
1058 if (incomplete_src && incomplete_dst) { 926template <class P>
927ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) {
928 ImageInfo new_info = info;
929 const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
930 const bool broken_views = runtime.HasBrokenTextureViewFormats();
931 std::vector<ImageId> overlap_ids;
932 std::vector<ImageId> left_aliased_ids;
933 std::vector<ImageId> right_aliased_ids;
934 ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
935 if (info.type != overlap.info.type) {
1059 return; 936 return;
1060 } 937 }
1061 938 if (info.type == ImageType::Linear) {
1062 const bool any_incomplete = incomplete_src || incomplete_dst; 939 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
1063 940 // Alias linear images with the same pitch
1064 if (!any_incomplete) { 941 left_aliased_ids.push_back(overlap_id);
1065 if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) {
1066 return;
1067 }
1068 } else {
1069 if (incomplete_src && !(deduced_dst.IsDepth())) {
1070 return;
1071 }
1072
1073 if (incomplete_dst && !(deduced_src.IsDepth())) {
1074 return;
1075 } 942 }
943 return;
1076 } 944 }
1077 945 static constexpr bool strict_size = true;
1078 const auto inherit_format = [](SurfaceParams& to, TSurface from) { 946 const std::optional<OverlapResult> solution =
1079 const SurfaceParams& params = from->GetSurfaceParams(); 947 ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views);
1080 to.pixel_format = params.pixel_format; 948 if (solution) {
1081 to.type = params.type; 949 gpu_addr = solution->gpu_addr;
1082 }; 950 cpu_addr = solution->cpu_addr;
1083 // Now we got the cases where one or both is Depth and the other is not known 951 new_info.resources = solution->resources;
1084 if (!incomplete_src) { 952 overlap_ids.push_back(overlap_id);
1085 inherit_format(src_params, deduced_src.surface); 953 return;
1086 } else {
1087 inherit_format(src_params, deduced_dst.surface);
1088 } 954 }
1089 if (!incomplete_dst) { 955 static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
1090 inherit_format(dst_params, deduced_dst.surface); 956 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
957 if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views)) {
958 left_aliased_ids.push_back(overlap_id);
959 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
960 broken_views)) {
961 right_aliased_ids.push_back(overlap_id);
962 }
963 });
964 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
965 Image& new_image = slot_images[new_image_id];
966
967 // TODO: Only upload what we need
968 RefreshContents(new_image);
969
970 for (const ImageId overlap_id : overlap_ids) {
971 Image& overlap = slot_images[overlap_id];
972 if (overlap.info.num_samples != new_image.info.num_samples) {
973 LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
1091 } else { 974 } else {
1092 inherit_format(dst_params, deduced_src.surface); 975 const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
976 const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base);
977 runtime.CopyImage(new_image, overlap, copies);
1093 } 978 }
979 if (True(overlap.flags & ImageFlagBits::Tracked)) {
980 UntrackImage(overlap);
981 }
982 UnregisterImage(overlap_id);
983 DeleteImage(overlap_id);
984 }
985 ImageBase& new_image_base = new_image;
986 for (const ImageId aliased_id : right_aliased_ids) {
987 ImageBase& aliased = slot_images[aliased_id];
988 AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
989 }
990 for (const ImageId aliased_id : left_aliased_ids) {
991 ImageBase& aliased = slot_images[aliased_id];
992 AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
1094 } 993 }
994 RegisterImage(new_image_id);
995 return new_image_id;
996}
1095 997
1096 std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, 998template <class P>
1097 bool preserve_contents) { 999typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
1098 auto new_surface{GetUncachedSurface(gpu_addr, params)}; 1000 const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) {
1099 Register(new_surface); 1001 static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples;
1100 if (preserve_contents) { 1002 const GPUVAddr dst_addr = dst.Address();
1101 LoadSurface(new_surface); 1003 const GPUVAddr src_addr = src.Address();
1102 } 1004 ImageInfo dst_info(dst);
1103 return {new_surface, new_surface->GetMainView()}; 1005 ImageInfo src_info(src);
1006 ImageId dst_id;
1007 ImageId src_id;
1008 do {
1009 has_deleted_images = false;
1010 dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS);
1011 src_id = FindImage(src_info, src_addr, FIND_OPTIONS);
1012 const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr;
1013 const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
1014 DeduceBlitImages(dst_info, src_info, dst_image, src_image);
1015 if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) {
1016 continue;
1017 }
1018 if (!dst_id) {
1019 dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{});
1020 }
1021 if (!src_id) {
1022 src_id = InsertImage(src_info, src_addr, RelaxedOptions{});
1023 }
1024 } while (has_deleted_images);
1025 return BlitImages{
1026 .dst_id = dst_id,
1027 .src_id = src_id,
1028 .dst_format = dst_info.format,
1029 .src_format = src_info.format,
1030 };
1031}
1032
1033template <class P>
1034SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
1035 if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
1036 return NULL_SAMPLER_ID;
1037 }
1038 const auto [pair, is_new] = samplers.try_emplace(config);
1039 if (is_new) {
1040 pair->second = slot_samplers.insert(runtime, config);
1104 } 1041 }
1042 return pair->second;
1043}
1105 1044
1106 void LoadSurface(const TSurface& surface) { 1045template <class P>
1107 staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); 1046ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
1108 surface->LoadBuffer(gpu_memory, staging_cache); 1047 const auto& regs = maxwell3d.regs;
1109 surface->UploadTexture(staging_cache.GetBuffer(0)); 1048 if (index >= regs.rt_control.count) {
1110 surface->MarkAsModified(false, Tick()); 1049 return ImageViewId{};
1050 }
1051 const auto& rt = regs.rt[index];
1052 const GPUVAddr gpu_addr = rt.Address();
1053 if (gpu_addr == 0) {
1054 return ImageViewId{};
1055 }
1056 if (rt.format == Tegra::RenderTargetFormat::NONE) {
1057 return ImageViewId{};
1111 } 1058 }
1059 const ImageInfo info(regs, index);
1060 return FindRenderTargetView(info, gpu_addr, is_clear);
1061}
1112 1062
1113 void FlushSurface(const TSurface& surface) { 1063template <class P>
1114 if (!surface->IsModified()) { 1064ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
1115 return; 1065 const auto& regs = maxwell3d.regs;
1116 } 1066 if (!regs.zeta_enable) {
1117 staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); 1067 return ImageViewId{};
1118 surface->DownloadTexture(staging_cache.GetBuffer(0)); 1068 }
1119 surface->FlushBuffer(gpu_memory, staging_cache); 1069 const GPUVAddr gpu_addr = regs.zeta.Address();
1120 surface->MarkAsModified(false, Tick()); 1070 if (gpu_addr == 0) {
1121 } 1071 return ImageViewId{};
1122
1123 void RegisterInnerCache(TSurface& surface) {
1124 const VAddr cpu_addr = surface->GetCpuAddr();
1125 VAddr start = cpu_addr >> registry_page_bits;
1126 const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
1127 l1_cache[cpu_addr] = surface;
1128 while (start <= end) {
1129 registry[start].push_back(surface);
1130 start++;
1131 }
1132 } 1072 }
1073 const ImageInfo info(regs);
1074 return FindRenderTargetView(info, gpu_addr, is_clear);
1075}
1133 1076
1134 void UnregisterInnerCache(TSurface& surface) { 1077template <class P>
1135 const VAddr cpu_addr = surface->GetCpuAddr(); 1078ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
1136 VAddr start = cpu_addr >> registry_page_bits; 1079 bool is_clear) {
1137 const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; 1080 const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
1138 l1_cache.erase(cpu_addr); 1081 const ImageId image_id = FindOrInsertImage(info, gpu_addr, options);
1139 while (start <= end) { 1082 if (!image_id) {
1140 auto& reg{registry[start]}; 1083 return NULL_IMAGE_VIEW_ID;
1141 reg.erase(std::find(reg.begin(), reg.end(), surface)); 1084 }
1142 start++; 1085 Image& image = slot_images[image_id];
1143 } 1086 const ImageViewType view_type = RenderTargetImageViewType(info);
1087 SubresourceBase base;
1088 if (image.info.type == ImageType::Linear) {
1089 base = SubresourceBase{.level = 0, .layer = 0};
1090 } else {
1091 base = image.TryFindBase(gpu_addr).value();
1144 } 1092 }
1093 const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers;
1094 const SubresourceRange range{
1095 .base = base,
1096 .extent = {.levels = 1, .layers = layers},
1097 };
1098 return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range));
1099}
1145 1100
1146 VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { 1101template <class P>
1147 if (size == 0) { 1102template <typename Func>
1148 return {}; 1103void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) {
1104 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1105 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1106 boost::container::small_vector<ImageId, 32> images;
1107 ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
1108 const auto it = page_table.find(page);
1109 if (it == page_table.end()) {
1110 if constexpr (BOOL_BREAK) {
1111 return false;
1112 } else {
1113 return;
1114 }
1149 } 1115 }
1150 const VAddr cpu_addr_end = cpu_addr + size; 1116 for (const ImageId image_id : it->second) {
1151 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; 1117 Image& image = slot_images[image_id];
1152 VectorSurface surfaces; 1118 if (True(image.flags & ImageFlagBits::Picked)) {
1153 for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
1154 const auto it = registry.find(start);
1155 if (it == registry.end()) {
1156 continue; 1119 continue;
1157 } 1120 }
1158 for (auto& surface : it->second) { 1121 if (!image.Overlaps(cpu_addr, size)) {
1159 if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { 1122 continue;
1160 continue; 1123 }
1124 image.flags |= ImageFlagBits::Picked;
1125 images.push_back(image_id);
1126 if constexpr (BOOL_BREAK) {
1127 if (func(image_id, image)) {
1128 return true;
1161 } 1129 }
1162 surface->MarkAsPicked(true); 1130 } else {
1163 surfaces.push_back(surface); 1131 func(image_id, image);
1164 } 1132 }
1165 } 1133 }
1166 for (auto& surface : surfaces) { 1134 if constexpr (BOOL_BREAK) {
1167 surface->MarkAsPicked(false); 1135 return false;
1168 } 1136 }
1169 return surfaces; 1137 });
1138 for (const ImageId image_id : images) {
1139 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
1170 } 1140 }
1141}
1171 1142
1172 void ReserveSurface(const SurfaceParams& params, TSurface surface) { 1143template <class P>
1173 surface_reserve[params].push_back(std::move(surface)); 1144ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
1145 Image& image = slot_images[image_id];
1146 if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
1147 return image_view_id;
1174 } 1148 }
1149 const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image);
1150 image.InsertView(info, image_view_id);
1151 return image_view_id;
1152}
1153
1154template <class P>
1155void TextureCache<P>::RegisterImage(ImageId image_id) {
1156 ImageBase& image = slot_images[image_id];
1157 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
1158 "Trying to register an already registered image");
1159 image.flags |= ImageFlagBits::Registered;
1160 ForEachPage(image.cpu_addr, image.guest_size_bytes,
1161 [this, image_id](u64 page) { page_table[page].push_back(image_id); });
1162}
1175 1163
1176 TSurface TryGetReservedSurface(const SurfaceParams& params) { 1164template <class P>
1177 auto search{surface_reserve.find(params)}; 1165void TextureCache<P>::UnregisterImage(ImageId image_id) {
1178 if (search == surface_reserve.end()) { 1166 Image& image = slot_images[image_id];
1179 return {}; 1167 ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
1168 "Trying to unregister an already registered image");
1169 image.flags &= ~ImageFlagBits::Registered;
1170 ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
1171 const auto page_it = page_table.find(page);
1172 if (page_it == page_table.end()) {
1173 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1174 return;
1180 } 1175 }
1181 for (auto& surface : search->second) { 1176 std::vector<ImageId>& image_ids = page_it->second;
1182 if (!surface->IsRegistered()) { 1177 const auto vector_it = std::ranges::find(image_ids, image_id);
1183 return surface; 1178 if (vector_it == image_ids.end()) {
1184 } 1179 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS);
1180 return;
1185 } 1181 }
1186 return {}; 1182 image_ids.erase(vector_it);
1187 } 1183 });
1184}
1188 1185
1189 /// Try to do an image copy logging when formats are incompatible. 1186template <class P>
1190 void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) { 1187void TextureCache<P>::TrackImage(ImageBase& image) {
1191 const SurfaceParams& src_params = src->GetSurfaceParams(); 1188 ASSERT(False(image.flags & ImageFlagBits::Tracked));
1192 const SurfaceParams& dst_params = dst->GetSurfaceParams(); 1189 image.flags |= ImageFlagBits::Tracked;
1193 if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) { 1190 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
1194 LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", dst_params.pixel_format, 1191}
1195 src_params.pixel_format); 1192
1196 return; 1193template <class P>
1194void TextureCache<P>::UntrackImage(ImageBase& image) {
1195 ASSERT(True(image.flags & ImageFlagBits::Tracked));
1196 image.flags &= ~ImageFlagBits::Tracked;
1197 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
1198}
1199
1200template <class P>
1201void TextureCache<P>::DeleteImage(ImageId image_id) {
1202 ImageBase& image = slot_images[image_id];
1203 const GPUVAddr gpu_addr = image.gpu_addr;
1204 const auto alloc_it = image_allocs_table.find(gpu_addr);
1205 if (alloc_it == image_allocs_table.end()) {
1206 UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}",
1207 gpu_addr);
1208 return;
1209 }
1210 const ImageAllocId alloc_id = alloc_it->second;
1211 std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images;
1212 const auto alloc_image_it = std::ranges::find(alloc_images, image_id);
1213 if (alloc_image_it == alloc_images.end()) {
1214 UNREACHABLE_MSG("Trying to delete an image that does not exist");
1215 return;
1216 }
1217 ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
1218 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
1219
1220 // Mark render targets as dirty
1221 auto& dirty = maxwell3d.dirty.flags;
1222 dirty[Dirty::RenderTargets] = true;
1223 dirty[Dirty::ZetaBuffer] = true;
1224 for (size_t rt = 0; rt < NUM_RT; ++rt) {
1225 dirty[Dirty::ColorBuffer0 + rt] = true;
1226 }
1227 const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
1228 for (const ImageViewId image_view_id : image_view_ids) {
1229 std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
1230 if (render_targets.depth_buffer_id == image_view_id) {
1231 render_targets.depth_buffer_id = ImageViewId{};
1197 } 1232 }
1198 ImageCopy(src, dst, copy);
1199 } 1233 }
1234 RemoveImageViewReferences(image_view_ids);
1235 RemoveFramebuffers(image_view_ids);
1236
1237 for (const AliasedImage& alias : image.aliased_images) {
1238 ImageBase& other_image = slot_images[alias.id];
1239 [[maybe_unused]] const size_t num_removed_aliases =
1240 std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
1241 return other_alias.id == image_id;
1242 });
1243 ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
1244 num_removed_aliases);
1245 }
1246 for (const ImageViewId image_view_id : image_view_ids) {
1247 sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
1248 slot_image_views.erase(image_view_id);
1249 }
1250 sentenced_images.Push(std::move(slot_images[image_id]));
1251 slot_images.erase(image_id);
1200 1252
1201 constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { 1253 alloc_images.erase(alloc_image_it);
1202 return siblings_table[static_cast<std::size_t>(format)]; 1254 if (alloc_images.empty()) {
1255 image_allocs_table.erase(alloc_it);
1203 } 1256 }
1257 if constexpr (ENABLE_VALIDATION) {
1258 std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
1259 std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
1260 }
1261 graphics_image_table.Invalidate();
1262 compute_image_table.Invalidate();
1263 has_deleted_images = true;
1264}
1204 1265
1205 /// Returns true the shader sampler entry is compatible with the TIC texture type. 1266template <class P>
1206 static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type, 1267void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
1207 const VideoCommon::Shader::Sampler& entry) { 1268 auto it = image_views.begin();
1208 const auto shader_type = entry.type; 1269 while (it != image_views.end()) {
1209 switch (tic_type) { 1270 const auto found = std::ranges::find(removed_views, it->second);
1210 case Tegra::Texture::TextureType::Texture1D: 1271 if (found != removed_views.end()) {
1211 case Tegra::Texture::TextureType::Texture1DArray: 1272 it = image_views.erase(it);
1212 return shader_type == Tegra::Shader::TextureType::Texture1D; 1273 } else {
1213 case Tegra::Texture::TextureType::Texture1DBuffer: 1274 ++it;
1214 // TODO(Rodrigo): Assume as valid for now
1215 return true;
1216 case Tegra::Texture::TextureType::Texture2D:
1217 case Tegra::Texture::TextureType::Texture2DNoMipmap:
1218 return shader_type == Tegra::Shader::TextureType::Texture2D;
1219 case Tegra::Texture::TextureType::Texture2DArray:
1220 return shader_type == Tegra::Shader::TextureType::Texture2D ||
1221 shader_type == Tegra::Shader::TextureType::TextureCube;
1222 case Tegra::Texture::TextureType::Texture3D:
1223 return shader_type == Tegra::Shader::TextureType::Texture3D;
1224 case Tegra::Texture::TextureType::TextureCubeArray:
1225 case Tegra::Texture::TextureType::TextureCubemap:
1226 if (shader_type == Tegra::Shader::TextureType::TextureCube) {
1227 return true;
1228 }
1229 return shader_type == Tegra::Shader::TextureType::Texture2D && entry.is_array;
1230 } 1275 }
1231 UNREACHABLE();
1232 return true;
1233 } 1276 }
1277}
1234 1278
1235 struct FramebufferTargetInfo { 1279template <class P>
1236 TSurface target; 1280void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) {
1237 TView view; 1281 auto it = framebuffers.begin();
1238 }; 1282 while (it != framebuffers.end()) {
1239 1283 if (it->first.Contains(removed_views)) {
1240 void AsyncFlushSurface(TSurface& surface) { 1284 it = framebuffers.erase(it);
1241 if (!uncommitted_flushes) { 1285 } else {
1242 uncommitted_flushes = std::make_shared<std::list<TSurface>>(); 1286 ++it;
1243 } 1287 }
1244 uncommitted_flushes->push_back(surface);
1245 } 1288 }
1289}
1246 1290
1247 VideoCore::RasterizerInterface& rasterizer; 1291template <class P>
1248 Tegra::Engines::Maxwell3D& maxwell3d; 1292void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
1249 Tegra::MemoryManager& gpu_memory; 1293 image.flags |= ImageFlagBits::GpuModified;
1250 1294 image.modification_tick = ++modification_tick;
1251 FormatLookupTable format_lookup_table; 1295}
1252 FormatCompatibility format_compatibility;
1253
1254 u64 ticks{};
1255
1256 // Guards the cache for protection conflicts.
1257 bool guard_render_targets{};
1258 bool guard_samplers{};
1259
1260 // The siblings table is for formats that can inter exchange with one another
1261 // without causing issues. This is only valid when a conflict occurs on a non
1262 // rendering use.
1263 std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table;
1264
1265 // The internal Cache is different for the Texture Cache. It's based on buckets
1266 // of 1MB. This fits better for the purpose of this cache as textures are normaly
1267 // large in size.
1268 static constexpr u64 registry_page_bits{20};
1269 static constexpr u64 registry_page_size{1 << registry_page_bits};
1270 std::unordered_map<VAddr, std::vector<TSurface>> registry;
1271 1296
1272 static constexpr u32 DEPTH_RT = 8; 1297template <class P>
1273 static constexpr u32 NO_RT = 0xFFFFFFFF; 1298void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
1299 boost::container::small_vector<const AliasedImage*, 1> aliased_images;
1300 ImageBase& image = slot_images[image_id];
1301 u64 most_recent_tick = image.modification_tick;
1302 for (const AliasedImage& aliased : image.aliased_images) {
1303 ImageBase& aliased_image = slot_images[aliased.id];
1304 if (image.modification_tick < aliased_image.modification_tick) {
1305 most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
1306 aliased_images.push_back(&aliased);
1307 }
1308 }
1309 if (aliased_images.empty()) {
1310 return;
1311 }
1312 image.modification_tick = most_recent_tick;
1313 std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
1314 const ImageBase& lhs_image = slot_images[lhs->id];
1315 const ImageBase& rhs_image = slot_images[rhs->id];
1316 return lhs_image.modification_tick < rhs_image.modification_tick;
1317 });
1318 for (const AliasedImage* const aliased : aliased_images) {
1319 CopyImage(image_id, aliased->id, aliased->copies);
1320 }
1321}
1274 1322
1275 // The L1 Cache is used for fast texture lookup before checking the overlaps 1323template <class P>
1276 // This avoids calculating size and other stuffs. 1324void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
1277 std::unordered_map<VAddr, TSurface> l1_cache; 1325 Image& image = slot_images[image_id];
1326 if (invalidate) {
1327 image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
1328 if (False(image.flags & ImageFlagBits::Tracked)) {
1329 TrackImage(image);
1330 }
1331 } else {
1332 RefreshContents(image);
1333 SynchronizeAliases(image_id);
1334 }
1335 if (is_modification) {
1336 MarkModification(image);
1337 }
1338 image.frame_tick = frame_tick;
1339}
1278 1340
1279 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have 1341template <class P>
1280 /// previously been used. This is to prevent surfaces from being constantly created and 1342void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification,
1281 /// destroyed when used with different surface parameters. 1343 bool invalidate) {
1282 std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve; 1344 if (!image_view_id) {
1283 std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> 1345 return;
1284 render_targets; 1346 }
1285 FramebufferTargetInfo depth_buffer; 1347 const ImageViewBase& image_view = slot_image_views[image_view_id];
1348 PrepareImage(image_view.image_id, is_modification, invalidate);
1349}
1286 1350
1287 std::vector<TSurface> sampled_textures; 1351template <class P>
1352void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) {
1353 Image& dst = slot_images[dst_id];
1354 Image& src = slot_images[src_id];
1355 const auto dst_format_type = GetFormatType(dst.info.format);
1356 const auto src_format_type = GetFormatType(src.info.format);
1357 if (src_format_type == dst_format_type) {
1358 if constexpr (HAS_EMULATED_COPIES) {
1359 if (!runtime.CanImageBeCopied(dst, src)) {
1360 return runtime.EmulateCopyImage(dst, src, copies);
1361 }
1362 }
1363 return runtime.CopyImage(dst, src, copies);
1364 }
1365 UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
1366 UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
1367 for (const ImageCopy& copy : copies) {
1368 UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
1369 UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1);
1370 UNIMPLEMENTED_IF(copy.src_offset != Offset3D{});
1371 UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{});
1372
1373 const SubresourceBase dst_base{
1374 .level = copy.dst_subresource.base_level,
1375 .layer = copy.dst_subresource.base_layer,
1376 };
1377 const SubresourceBase src_base{
1378 .level = copy.src_subresource.base_level,
1379 .layer = copy.src_subresource.base_layer,
1380 };
1381 const SubresourceExtent dst_extent{.levels = 1, .layers = 1};
1382 const SubresourceExtent src_extent{.levels = 1, .layers = 1};
1383 const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent};
1384 const SubresourceRange src_range{.base = src_base, .extent = src_extent};
1385 const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range);
1386 const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range);
1387 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
1388 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
1389 const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info);
1390 ImageView& dst_view = slot_image_views[dst_view_id];
1391 ImageView& src_view = slot_image_views[src_view_id];
1392 [[maybe_unused]] const Extent3D expected_size{
1393 .width = std::min(dst_view.size.width, src_view.size.width),
1394 .height = std::min(dst_view.size.height, src_view.size.height),
1395 .depth = std::min(dst_view.size.depth, src_view.size.depth),
1396 };
1397 UNIMPLEMENTED_IF(copy.extent != expected_size);
1288 1398
1289 /// This cache stores null surfaces in order to be used as a placeholder 1399 runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
1290 /// for invalid texture calls. 1400 }
1291 std::unordered_map<u32, TSurface> invalid_cache; 1401}
1292 std::vector<u8> invalid_memory;
1293 1402
1294 std::list<TSurface> marked_for_unregister; 1403template <class P>
1404void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) {
1405 if (*old_id == new_id) {
1406 return;
1407 }
1408 if (*old_id) {
1409 const ImageViewBase& old_view = slot_image_views[*old_id];
1410 if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
1411 uncommitted_downloads.push_back(old_view.image_id);
1412 }
1413 }
1414 *old_id = new_id;
1415}
1295 1416
1296 std::shared_ptr<std::list<TSurface>> uncommitted_flushes{}; 1417template <class P>
1297 std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes; 1418std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage(
1419 ImageId image_id, const ImageViewInfo& view_info) {
1420 const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info);
1421 const ImageBase& image = slot_images[image_id];
1422 const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture;
1423 const ImageViewId color_view_id = is_color ? view_id : ImageViewId{};
1424 const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id;
1425 const Extent3D extent = MipSize(image.info.size, view_info.range.base.level);
1426 const u32 num_samples = image.info.num_samples;
1427 const auto [samples_x, samples_y] = SamplesLog2(num_samples);
1428 const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{
1429 .color_buffer_ids = {color_view_id},
1430 .depth_buffer_id = depth_view_id,
1431 .size = {extent.width >> samples_x, extent.height >> samples_y},
1432 });
1433 return {framebuffer_id, view_id};
1434}
1298 1435
1299 StagingCache staging_cache; 1436template <class P>
1300 std::recursive_mutex mutex; 1437bool TextureCache<P>::IsFullClear(ImageViewId id) {
1301}; 1438 if (!id) {
1439 return true;
1440 }
1441 const ImageViewBase& image_view = slot_image_views[id];
1442 const ImageBase& image = slot_images[image_view.image_id];
1443 const Extent3D size = image_view.size;
1444 const auto& regs = maxwell3d.regs;
1445 const auto& scissor = regs.scissor_test[0];
1446 if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
1447 // Images with multiple resources can't be cleared in a single call
1448 return false;
1449 }
1450 if (regs.clear_flags.scissor == 0) {
1451 // If scissor testing is disabled, the clear is always full
1452 return true;
1453 }
1454 // Make sure the clear covers all texels in the subresource
1455 return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width &&
1456 scissor.max_y >= size.height;
1457}
1302 1458
1303} // namespace VideoCommon 1459} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
new file mode 100644
index 000000000..2ad2d72a6
--- /dev/null
+++ b/src/video_core/texture_cache/types.h
@@ -0,0 +1,140 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_funcs.h"
8#include "common/common_types.h"
9#include "video_core/texture_cache/slot_vector.h"
10
11namespace VideoCommon {
12
13constexpr size_t NUM_RT = 8;
14constexpr size_t MAX_MIP_LEVELS = 14;
15
16constexpr SlotId CORRUPT_ID{0xfffffffe};
17
18using ImageId = SlotId;
19using ImageViewId = SlotId;
20using ImageAllocId = SlotId;
21using SamplerId = SlotId;
22using FramebufferId = SlotId;
23
24enum class ImageType : u32 {
25 e1D,
26 e2D,
27 e3D,
28 Linear,
29 Buffer,
30};
31
32enum class ImageViewType : u32 {
33 e1D,
34 e2D,
35 Cube,
36 e3D,
37 e1DArray,
38 e2DArray,
39 CubeArray,
40 Rect,
41 Buffer,
42};
43constexpr size_t NUM_IMAGE_VIEW_TYPES = 9;
44
45enum class RelaxedOptions : u32 {
46 Size = 1 << 0,
47 Format = 1 << 1,
48 Samples = 1 << 2,
49};
50DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions)
51
52struct Offset2D {
53 constexpr auto operator<=>(const Offset2D&) const noexcept = default;
54
55 s32 x;
56 s32 y;
57};
58
59struct Offset3D {
60 constexpr auto operator<=>(const Offset3D&) const noexcept = default;
61
62 s32 x;
63 s32 y;
64 s32 z;
65};
66
67struct Extent2D {
68 constexpr auto operator<=>(const Extent2D&) const noexcept = default;
69
70 u32 width;
71 u32 height;
72};
73
74struct Extent3D {
75 constexpr auto operator<=>(const Extent3D&) const noexcept = default;
76
77 u32 width;
78 u32 height;
79 u32 depth;
80};
81
82struct SubresourceLayers {
83 s32 base_level = 0;
84 s32 base_layer = 0;
85 s32 num_layers = 1;
86};
87
88struct SubresourceBase {
89 constexpr auto operator<=>(const SubresourceBase&) const noexcept = default;
90
91 s32 level = 0;
92 s32 layer = 0;
93};
94
95struct SubresourceExtent {
96 constexpr auto operator<=>(const SubresourceExtent&) const noexcept = default;
97
98 s32 levels = 1;
99 s32 layers = 1;
100};
101
102struct SubresourceRange {
103 constexpr auto operator<=>(const SubresourceRange&) const noexcept = default;
104
105 SubresourceBase base;
106 SubresourceExtent extent;
107};
108
109struct ImageCopy {
110 SubresourceLayers src_subresource;
111 SubresourceLayers dst_subresource;
112 Offset3D src_offset;
113 Offset3D dst_offset;
114 Extent3D extent;
115};
116
117struct BufferImageCopy {
118 size_t buffer_offset;
119 size_t buffer_size;
120 u32 buffer_row_length;
121 u32 buffer_image_height;
122 SubresourceLayers image_subresource;
123 Offset3D image_offset;
124 Extent3D image_extent;
125};
126
127struct BufferCopy {
128 size_t src_offset;
129 size_t dst_offset;
130 size_t size;
131};
132
133struct SwizzleParameters {
134 Extent3D num_tiles;
135 Extent3D block;
136 size_t buffer_offset;
137 s32 level;
138};
139
140} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
new file mode 100644
index 000000000..279932778
--- /dev/null
+++ b/src/video_core/texture_cache/util.cpp
@@ -0,0 +1,1233 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// This files contains code from Ryujinx
6// A copy of the code can be obtained from https://github.com/Ryujinx/Ryujinx
7// The sections using code from Ryujinx are marked with a link to the original version
8
9// MIT License
10//
11// Copyright (c) Ryujinx Team and Contributors
12//
13// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
14// associated documentation files (the "Software"), to deal in the Software without restriction,
15// including without limitation the rights to use, copy, modify, merge, publish, distribute,
16// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
17// furnished to do so, subject to the following conditions:
18//
19// The above copyright notice and this permission notice shall be included in all copies or
20// substantial portions of the Software.
21//
22// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
23// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
25// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27//
28
29#include <algorithm>
30#include <array>
31#include <numeric>
32#include <optional>
33#include <span>
34#include <vector>
35
36#include "common/alignment.h"
37#include "common/assert.h"
38#include "common/bit_util.h"
39#include "common/common_types.h"
40#include "common/div_ceil.h"
41#include "video_core/compatible_formats.h"
42#include "video_core/engines/maxwell_3d.h"
43#include "video_core/memory_manager.h"
44#include "video_core/surface.h"
45#include "video_core/texture_cache/decode_bc4.h"
46#include "video_core/texture_cache/format_lookup_table.h"
47#include "video_core/texture_cache/formatter.h"
48#include "video_core/texture_cache/samples_helper.h"
49#include "video_core/texture_cache/util.h"
50#include "video_core/textures/astc.h"
51#include "video_core/textures/decoders.h"
52
53namespace VideoCommon {
54
55namespace {
56
57using Tegra::Texture::GOB_SIZE;
58using Tegra::Texture::GOB_SIZE_SHIFT;
59using Tegra::Texture::GOB_SIZE_X;
60using Tegra::Texture::GOB_SIZE_X_SHIFT;
61using Tegra::Texture::GOB_SIZE_Y;
62using Tegra::Texture::GOB_SIZE_Y_SHIFT;
63using Tegra::Texture::GOB_SIZE_Z;
64using Tegra::Texture::GOB_SIZE_Z_SHIFT;
65using Tegra::Texture::MsaaMode;
66using Tegra::Texture::SwizzleTexture;
67using Tegra::Texture::TextureFormat;
68using Tegra::Texture::TextureType;
69using Tegra::Texture::TICEntry;
70using Tegra::Texture::UnswizzleTexture;
71using VideoCore::Surface::BytesPerBlock;
72using VideoCore::Surface::DefaultBlockHeight;
73using VideoCore::Surface::DefaultBlockWidth;
74using VideoCore::Surface::IsCopyCompatible;
75using VideoCore::Surface::IsPixelFormatASTC;
76using VideoCore::Surface::IsViewCompatible;
77using VideoCore::Surface::PixelFormatFromDepthFormat;
78using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
79using VideoCore::Surface::SurfaceType;
80
81constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM);
82
83struct LevelInfo {
84 Extent3D size;
85 Extent3D block;
86 Extent2D tile_size;
87 u32 bpp_log2;
88 u32 tile_width_spacing;
89};
90
91[[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) {
92 if (shift == 0) {
93 return 0;
94 }
95 u32 x = unit_factor << (shift - 1);
96 if (x >= dimension) {
97 while (--shift) {
98 x >>= 1;
99 if (x < dimension) {
100 break;
101 }
102 }
103 }
104 return shift;
105}
106
107[[nodiscard]] constexpr u32 AdjustMipSize(u32 size, u32 level) {
108 return std::max<u32>(size >> level, 1);
109}
110
111[[nodiscard]] constexpr Extent3D AdjustMipSize(Extent3D size, s32 level) {
112 return Extent3D{
113 .width = AdjustMipSize(size.width, level),
114 .height = AdjustMipSize(size.height, level),
115 .depth = AdjustMipSize(size.depth, level),
116 };
117}
118
119[[nodiscard]] Extent3D AdjustSamplesSize(Extent3D size, s32 num_samples) {
120 const auto [samples_x, samples_y] = SamplesLog2(num_samples);
121 return Extent3D{
122 .width = size.width >> samples_x,
123 .height = size.height >> samples_y,
124 .depth = size.depth,
125 };
126}
127
128template <u32 GOB_EXTENT>
129[[nodiscard]] constexpr u32 AdjustMipBlockSize(u32 num_tiles, u32 block_size, u32 level) {
130 do {
131 while (block_size > 0 && num_tiles <= (1U << (block_size - 1)) * GOB_EXTENT) {
132 --block_size;
133 }
134 } while (level--);
135 return block_size;
136}
137
138[[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size,
139 u32 level) {
140 return {
141 .width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level),
142 .height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level),
143 .depth = AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level),
144 };
145}
146
147[[nodiscard]] constexpr Extent3D AdjustTileSize(Extent3D size, Extent2D tile_size) {
148 return {
149 .width = Common::DivCeil(size.width, tile_size.width),
150 .height = Common::DivCeil(size.height, tile_size.height),
151 .depth = size.depth,
152 };
153}
154
155[[nodiscard]] constexpr u32 BytesPerBlockLog2(u32 bytes_per_block) {
156 return std::countl_zero(bytes_per_block) ^ 0x1F;
157}
158
159[[nodiscard]] constexpr u32 BytesPerBlockLog2(PixelFormat format) {
160 return BytesPerBlockLog2(BytesPerBlock(format));
161}
162
163[[nodiscard]] constexpr u32 NumBlocks(Extent3D size, Extent2D tile_size) {
164 const Extent3D num_blocks = AdjustTileSize(size, tile_size);
165 return num_blocks.width * num_blocks.height * num_blocks.depth;
166}
167
168[[nodiscard]] constexpr u32 AdjustSize(u32 size, u32 level, u32 block_size) {
169 return Common::DivCeil(AdjustMipSize(size, level), block_size);
170}
171
172[[nodiscard]] constexpr u32 LayerSize(const TICEntry& config, PixelFormat format) {
173 return config.Width() * config.Height() * BytesPerBlock(format);
174}
175
176[[nodiscard]] constexpr bool HasTwoDimsPerLayer(TextureType type) {
177 switch (type) {
178 case TextureType::Texture2D:
179 case TextureType::Texture2DArray:
180 case TextureType::Texture2DNoMipmap:
181 case TextureType::Texture3D:
182 case TextureType::TextureCubeArray:
183 case TextureType::TextureCubemap:
184 return true;
185 case TextureType::Texture1D:
186 case TextureType::Texture1DArray:
187 case TextureType::Texture1DBuffer:
188 return false;
189 }
190 return false;
191}
192
193[[nodiscard]] constexpr bool HasTwoDimsPerLayer(ImageType type) {
194 switch (type) {
195 case ImageType::e2D:
196 case ImageType::e3D:
197 case ImageType::Linear:
198 return true;
199 case ImageType::e1D:
200 case ImageType::Buffer:
201 return false;
202 }
203 UNREACHABLE_MSG("Invalid image type={}", static_cast<int>(type));
204}
205
206[[nodiscard]] constexpr std::pair<int, int> Samples(int num_samples) {
207 switch (num_samples) {
208 case 1:
209 return {1, 1};
210 case 2:
211 return {2, 1};
212 case 4:
213 return {2, 2};
214 case 8:
215 return {4, 2};
216 case 16:
217 return {4, 4};
218 }
219 UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
220 return {1, 1};
221}
222
223[[nodiscard]] constexpr Extent2D DefaultBlockSize(PixelFormat format) {
224 return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
225}
226
227[[nodiscard]] constexpr Extent3D NumLevelBlocks(const LevelInfo& info, u32 level) {
228 return Extent3D{
229 .width = AdjustSize(info.size.width, level, info.tile_size.width) << info.bpp_log2,
230 .height = AdjustSize(info.size.height, level, info.tile_size.height),
231 .depth = AdjustMipSize(info.size.depth, level),
232 };
233}
234
235[[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) {
236 const Extent3D blocks = NumLevelBlocks(info, level);
237 return Extent3D{
238 .width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width),
239 .height = AdjustTileSize(info.block.height, GOB_SIZE_Y, blocks.height),
240 .depth = AdjustTileSize(info.block.depth, GOB_SIZE_Z, blocks.depth),
241 };
242}
243
244[[nodiscard]] constexpr Extent2D GobSize(u32 bpp_log2, u32 block_height, u32 tile_width_spacing) {
245 return Extent2D{
246 .width = GOB_SIZE_X_SHIFT - bpp_log2 + tile_width_spacing,
247 .height = GOB_SIZE_Y_SHIFT + block_height,
248 };
249}
250
251[[nodiscard]] constexpr bool IsSmallerThanGobSize(Extent3D num_tiles, Extent2D gob,
252 u32 block_depth) {
253 return num_tiles.width <= (1U << gob.width) || num_tiles.height <= (1U << gob.height) ||
254 num_tiles.depth < (1U << block_depth);
255}
256
257[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, Extent2D gob,
258 u32 bpp_log2) {
259 if (IsSmallerThanGobSize(num_tiles, gob, block.depth)) {
260 return GOB_SIZE_X_SHIFT - bpp_log2;
261 } else {
262 return gob.width;
263 }
264}
265
266[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, u32 bpp_log2,
267 u32 tile_width_spacing) {
268 const Extent2D gob = GobSize(bpp_log2, block.height, tile_width_spacing);
269 return StrideAlignment(num_tiles, block, gob, bpp_log2);
270}
271
272[[nodiscard]] constexpr Extent2D NumGobs(const LevelInfo& info, u32 level) {
273 const Extent3D blocks = NumLevelBlocks(info, level);
274 const Extent2D gobs{
275 .width = Common::DivCeilLog2(blocks.width, GOB_SIZE_X_SHIFT),
276 .height = Common::DivCeilLog2(blocks.height, GOB_SIZE_Y_SHIFT),
277 };
278 const Extent2D gob = GobSize(info.bpp_log2, info.block.height, info.tile_width_spacing);
279 const bool is_small = IsSmallerThanGobSize(blocks, gob, info.block.depth);
280 const u32 alignment = is_small ? 0 : info.tile_width_spacing;
281 return Extent2D{
282 .width = Common::AlignBits(gobs.width, alignment),
283 .height = gobs.height,
284 };
285}
286
287[[nodiscard]] constexpr Extent3D LevelTiles(const LevelInfo& info, u32 level) {
288 const Extent3D blocks = NumLevelBlocks(info, level);
289 const Extent3D tile_shift = TileShift(info, level);
290 const Extent2D gobs = NumGobs(info, level);
291 return Extent3D{
292 .width = Common::DivCeilLog2(gobs.width, tile_shift.width),
293 .height = Common::DivCeilLog2(gobs.height, tile_shift.height),
294 .depth = Common::DivCeilLog2(blocks.depth, tile_shift.depth),
295 };
296}
297
298[[nodiscard]] constexpr u32 CalculateLevelSize(const LevelInfo& info, u32 level) {
299 const Extent3D tile_shift = TileShift(info, level);
300 const Extent3D tiles = LevelTiles(info, level);
301 const u32 num_tiles = tiles.width * tiles.height * tiles.depth;
302 const u32 shift = GOB_SIZE_SHIFT + tile_shift.width + tile_shift.height + tile_shift.depth;
303 return num_tiles << shift;
304}
305
306[[nodiscard]] constexpr std::array<u32, MAX_MIP_LEVELS> CalculateLevelSizes(const LevelInfo& info,
307 u32 num_levels) {
308 ASSERT(num_levels <= MAX_MIP_LEVELS);
309 std::array<u32, MAX_MIP_LEVELS> sizes{};
310 for (u32 level = 0; level < num_levels; ++level) {
311 sizes[level] = CalculateLevelSize(info, level);
312 }
313 return sizes;
314}
315
316[[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block,
317 u32 num_samples, u32 tile_width_spacing) {
318 const auto [samples_x, samples_y] = Samples(num_samples);
319 const u32 bytes_per_block = BytesPerBlock(format);
320 return {
321 .size =
322 {
323 .width = size.width * samples_x,
324 .height = size.height * samples_y,
325 .depth = size.depth,
326 },
327 .block = block,
328 .tile_size = DefaultBlockSize(format),
329 .bpp_log2 = BytesPerBlockLog2(bytes_per_block),
330 .tile_width_spacing = tile_width_spacing,
331 };
332}
333
334[[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) {
335 return MakeLevelInfo(info.format, info.size, info.block, info.num_samples,
336 info.tile_width_spacing);
337}
338
339[[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block,
340 u32 num_samples, u32 tile_width_spacing,
341 u32 level) {
342 const LevelInfo info = MakeLevelInfo(format, size, block, num_samples, tile_width_spacing);
343 u32 offset = 0;
344 for (u32 current_level = 0; current_level < level; ++current_level) {
345 offset += CalculateLevelSize(info, current_level);
346 }
347 return offset;
348}
349
350[[nodiscard]] constexpr u32 AlignLayerSize(u32 size_bytes, Extent3D size, Extent3D block,
351 u32 tile_size_y, u32 tile_width_spacing) {
352 // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L134
353 if (tile_width_spacing > 0) {
354 const u32 alignment_log2 = GOB_SIZE_SHIFT + tile_width_spacing + block.height + block.depth;
355 return Common::AlignBits(size_bytes, alignment_log2);
356 }
357 const u32 aligned_height = Common::AlignUp(size.height, tile_size_y);
358 while (block.height != 0 && aligned_height <= (1U << (block.height - 1)) * GOB_SIZE_Y) {
359 --block.height;
360 }
361 while (block.depth != 0 && size.depth <= (1U << (block.depth - 1))) {
362 --block.depth;
363 }
364 const u32 block_shift = GOB_SIZE_SHIFT + block.height + block.depth;
365 const u32 num_blocks = size_bytes >> block_shift;
366 if (size_bytes != num_blocks << block_shift) {
367 return (num_blocks + 1) << block_shift;
368 }
369 return size_bytes;
370}
371
372[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapEqualAddress(const ImageInfo& new_info,
373 const ImageBase& overlap,
374 bool strict_size) {
375 const ImageInfo& info = overlap.info;
376 if (!IsBlockLinearSizeCompatible(new_info, info, 0, 0, strict_size)) {
377 return std::nullopt;
378 }
379 if (new_info.block != info.block) {
380 return std::nullopt;
381 }
382 const SubresourceExtent resources = new_info.resources;
383 return SubresourceExtent{
384 .levels = std::max(resources.levels, info.resources.levels),
385 .layers = std::max(resources.layers, info.resources.layers),
386 };
387}
388
389[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D(
390 const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
391 const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info);
392 const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr);
393 const auto it = std::ranges::find(slice_offsets, diff);
394 if (it == slice_offsets.end()) {
395 return std::nullopt;
396 }
397 const std::vector subresources = CalculateSliceSubresources(new_info);
398 const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)];
399 const ImageInfo& info = overlap.info;
400 if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
401 return std::nullopt;
402 }
403 const u32 mip_depth = std::max(1U, new_info.size.depth << base.level);
404 if (mip_depth < info.size.depth + base.layer) {
405 return std::nullopt;
406 }
407 if (MipBlockSize(new_info, base.level) != info.block) {
408 return std::nullopt;
409 }
410 return SubresourceExtent{
411 .levels = std::max(new_info.resources.levels, info.resources.levels + base.level),
412 .layers = 1,
413 };
414}
415
416[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D(
417 const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
418 const u32 layer_stride = new_info.layer_stride;
419 const s32 new_size = layer_stride * new_info.resources.layers;
420 const s32 diff = static_cast<s32>(overlap.gpu_addr - gpu_addr);
421 if (diff > new_size) {
422 return std::nullopt;
423 }
424 const s32 base_layer = diff / layer_stride;
425 const s32 mip_offset = diff % layer_stride;
426 const std::array offsets = CalculateMipLevelOffsets(new_info);
427 const auto end = offsets.begin() + new_info.resources.levels;
428 const auto it = std::find(offsets.begin(), end, mip_offset);
429 if (it == end) {
430 // Mipmap is not aligned to any valid size
431 return std::nullopt;
432 }
433 const SubresourceBase base{
434 .level = static_cast<s32>(std::distance(offsets.begin(), it)),
435 .layer = base_layer,
436 };
437 const ImageInfo& info = overlap.info;
438 if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
439 return std::nullopt;
440 }
441 if (MipBlockSize(new_info, base.level) != info.block) {
442 return std::nullopt;
443 }
444 return SubresourceExtent{
445 .levels = std::max(new_info.resources.levels, info.resources.levels + base.level),
446 .layers = std::max(new_info.resources.layers, info.resources.layers + base.layer),
447 };
448}
449
450[[nodiscard]] std::optional<OverlapResult> ResolveOverlapRightAddress(const ImageInfo& new_info,
451 GPUVAddr gpu_addr,
452 VAddr cpu_addr,
453 const ImageBase& overlap,
454 bool strict_size) {
455 std::optional<SubresourceExtent> resources;
456 if (new_info.type != ImageType::e3D) {
457 resources = ResolveOverlapRightAddress2D(new_info, gpu_addr, overlap, strict_size);
458 } else {
459 resources = ResolveOverlapRightAddress3D(new_info, gpu_addr, overlap, strict_size);
460 }
461 if (!resources) {
462 return std::nullopt;
463 }
464 return OverlapResult{
465 .gpu_addr = gpu_addr,
466 .cpu_addr = cpu_addr,
467 .resources = *resources,
468 };
469}
470
471[[nodiscard]] std::optional<OverlapResult> ResolveOverlapLeftAddress(const ImageInfo& new_info,
472 GPUVAddr gpu_addr,
473 VAddr cpu_addr,
474 const ImageBase& overlap,
475 bool strict_size) {
476 const std::optional<SubresourceBase> base = overlap.TryFindBase(gpu_addr);
477 if (!base) {
478 return std::nullopt;
479 }
480 const ImageInfo& info = overlap.info;
481 if (!IsBlockLinearSizeCompatible(new_info, info, base->level, 0, strict_size)) {
482 return std::nullopt;
483 }
484 if (new_info.block != MipBlockSize(info, base->level)) {
485 return std::nullopt;
486 }
487 const SubresourceExtent resources = new_info.resources;
488 s32 layers = 1;
489 if (info.type != ImageType::e3D) {
490 layers = std::max(resources.layers, info.resources.layers + base->layer);
491 }
492 return OverlapResult{
493 .gpu_addr = overlap.gpu_addr,
494 .cpu_addr = overlap.cpu_addr,
495 .resources =
496 {
497 .levels = std::max(resources.levels + base->level, info.resources.levels),
498 .layers = layers,
499 },
500 };
501}
502
503[[nodiscard]] Extent2D PitchLinearAlignedSize(const ImageInfo& info) {
504 // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L212
505 static constexpr u32 STRIDE_ALIGNMENT = 32;
506 ASSERT(info.type == ImageType::Linear);
507 const Extent2D num_tiles{
508 .width = Common::DivCeil(info.size.width, DefaultBlockWidth(info.format)),
509 .height = Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)),
510 };
511 const u32 width_alignment = STRIDE_ALIGNMENT / BytesPerBlock(info.format);
512 return Extent2D{
513 .width = Common::AlignUp(num_tiles.width, width_alignment),
514 .height = num_tiles.height,
515 };
516}
517
518[[nodiscard]] Extent3D BlockLinearAlignedSize(const ImageInfo& info, u32 level) {
519 // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L176
520 ASSERT(info.type != ImageType::Linear);
521 const Extent3D size = AdjustMipSize(info.size, level);
522 const Extent3D num_tiles{
523 .width = Common::DivCeil(size.width, DefaultBlockWidth(info.format)),
524 .height = Common::DivCeil(size.height, DefaultBlockHeight(info.format)),
525 .depth = size.depth,
526 };
527 const u32 bpp_log2 = BytesPerBlockLog2(info.format);
528 const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing);
529 const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0);
530 return Extent3D{
531 .width = Common::AlignBits(num_tiles.width, alignment),
532 .height = Common::AlignBits(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height),
533 .depth = Common::AlignBits(num_tiles.depth, GOB_SIZE_Z_SHIFT + mip_block.depth),
534 };
535}
536
537[[nodiscard]] constexpr u32 NumBlocksPerLayer(const ImageInfo& info, Extent2D tile_size) noexcept {
538 u32 num_blocks = 0;
539 for (s32 level = 0; level < info.resources.levels; ++level) {
540 const Extent3D mip_size = AdjustMipSize(info.size, level);
541 num_blocks += NumBlocks(mip_size, tile_size);
542 }
543 return num_blocks;
544}
545
546[[nodiscard]] u32 NumSlices(const ImageInfo& info) noexcept {
547 ASSERT(info.type == ImageType::e3D);
548 u32 num_slices = 0;
549 for (s32 level = 0; level < info.resources.levels; ++level) {
550 num_slices += AdjustMipSize(info.size.depth, level);
551 }
552 return num_slices;
553}
554
555void SwizzlePitchLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
556 const ImageInfo& info, const BufferImageCopy& copy,
557 std::span<const u8> memory) {
558 ASSERT(copy.image_offset.z == 0);
559 ASSERT(copy.image_extent.depth == 1);
560 ASSERT(copy.image_subresource.base_level == 0);
561 ASSERT(copy.image_subresource.base_layer == 0);
562 ASSERT(copy.image_subresource.num_layers == 1);
563
564 const u32 bytes_per_block = BytesPerBlock(info.format);
565 const u32 row_length = copy.image_extent.width * bytes_per_block;
566 const u32 guest_offset_x = copy.image_offset.x * bytes_per_block;
567
568 for (u32 line = 0; line < copy.image_extent.height; ++line) {
569 const u32 host_offset_y = line * info.pitch;
570 const u32 guest_offset_y = (copy.image_offset.y + line) * info.pitch;
571 const u32 guest_offset = guest_offset_x + guest_offset_y;
572 gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, memory.data() + host_offset_y,
573 row_length);
574 }
575}
576
577void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
578 const ImageInfo& info, const BufferImageCopy& copy,
579 std::span<const u8> input) {
580 const Extent3D size = info.size;
581 const LevelInfo level_info = MakeLevelInfo(info);
582 const Extent2D tile_size = DefaultBlockSize(info.format);
583 const u32 bytes_per_block = BytesPerBlock(info.format);
584
585 const s32 level = copy.image_subresource.base_level;
586 const Extent3D level_size = AdjustMipSize(size, level);
587 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
588 const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block;
589
590 UNIMPLEMENTED_IF(info.tile_width_spacing > 0);
591
592 UNIMPLEMENTED_IF(copy.image_offset.x != 0);
593 UNIMPLEMENTED_IF(copy.image_offset.y != 0);
594 UNIMPLEMENTED_IF(copy.image_offset.z != 0);
595 UNIMPLEMENTED_IF(copy.image_extent != level_size);
596
597 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
598 const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
599
600 size_t host_offset = copy.buffer_offset;
601
602 const u32 num_levels = info.resources.levels;
603 const std::array sizes = CalculateLevelSizes(level_info, num_levels);
604 size_t guest_offset = std::reduce(sizes.begin(), sizes.begin() + level, 0);
605 const size_t layer_stride =
606 AlignLayerSize(std::reduce(sizes.begin(), sizes.begin() + num_levels, 0), size,
607 level_info.block, tile_size.height, info.tile_width_spacing);
608 const size_t subresource_size = sizes[level];
609
610 const auto dst_data = std::make_unique<u8[]>(subresource_size);
611 const std::span<u8> dst(dst_data.get(), subresource_size);
612
613 for (s32 layer = 0; layer < info.resources.layers; ++layer) {
614 const std::span<const u8> src = input.subspan(host_offset);
615 SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
616 num_tiles.depth, block.height, block.depth);
617
618 gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
619
620 host_offset += host_bytes_per_layer;
621 guest_offset += layer_stride;
622 }
623 ASSERT(host_offset - copy.buffer_offset == copy.buffer_size);
624}
625
626} // Anonymous namespace
627
628u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept {
629 if (info.type == ImageType::Buffer) {
630 return info.size.width * BytesPerBlock(info.format);
631 }
632 if (info.type == ImageType::Linear) {
633 return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format));
634 }
635 if (info.resources.layers > 1) {
636 ASSERT(info.layer_stride != 0);
637 return info.layer_stride * info.resources.layers;
638 } else {
639 return CalculateLayerSize(info);
640 }
641}
642
643u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept {
644 if (info.type == ImageType::Buffer) {
645 return info.size.width * BytesPerBlock(info.format);
646 }
647 if (info.num_samples > 1) {
648 // Multisample images can't be uploaded or downloaded to the host
649 return 0;
650 }
651 if (info.type == ImageType::Linear) {
652 return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format));
653 }
654 const Extent2D tile_size = DefaultBlockSize(info.format);
655 return NumBlocksPerLayer(info, tile_size) * info.resources.layers * BytesPerBlock(info.format);
656}
657
658u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept {
659 if (info.type == ImageType::Buffer) {
660 return info.size.width * BytesPerBlock(info.format);
661 }
662 static constexpr Extent2D TILE_SIZE{1, 1};
663 return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK;
664}
665
666u32 CalculateLayerStride(const ImageInfo& info) noexcept {
667 ASSERT(info.type != ImageType::Linear);
668 const u32 layer_size = CalculateLayerSize(info);
669 const Extent3D size = info.size;
670 const Extent3D block = info.block;
671 const u32 tile_size_y = DefaultBlockHeight(info.format);
672 return AlignLayerSize(layer_size, size, block, tile_size_y, info.tile_width_spacing);
673}
674
675u32 CalculateLayerSize(const ImageInfo& info) noexcept {
676 ASSERT(info.type != ImageType::Linear);
677 return CalculateLevelOffset(info.format, info.size, info.block, info.num_samples,
678 info.tile_width_spacing, info.resources.levels);
679}
680
681std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(const ImageInfo& info) noexcept {
682 ASSERT(info.resources.levels <= MAX_MIP_LEVELS);
683 const LevelInfo level_info = MakeLevelInfo(info);
684 std::array<u32, MAX_MIP_LEVELS> offsets{};
685 u32 offset = 0;
686 for (s32 level = 0; level < info.resources.levels; ++level) {
687 offsets[level] = offset;
688 offset += CalculateLevelSize(level_info, level);
689 }
690 return offsets;
691}
692
693std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
694 ASSERT(info.type == ImageType::e3D);
695 std::vector<u32> offsets;
696 offsets.reserve(NumSlices(info));
697
698 const LevelInfo level_info = MakeLevelInfo(info);
699 u32 mip_offset = 0;
700 for (s32 level = 0; level < info.resources.levels; ++level) {
701 const Extent3D tile_shift = TileShift(level_info, level);
702 const Extent3D tiles = LevelTiles(level_info, level);
703 const u32 gob_size_shift = tile_shift.height + GOB_SIZE_SHIFT;
704 const u32 slice_size = (tiles.width * tiles.height) << gob_size_shift;
705 const u32 z_mask = (1U << tile_shift.depth) - 1;
706 const u32 depth = AdjustMipSize(info.size.depth, level);
707 for (u32 slice = 0; slice < depth; ++slice) {
708 const u32 z_low = slice & z_mask;
709 const u32 z_high = slice & ~z_mask;
710 offsets.push_back(mip_offset + (z_low << gob_size_shift) + (z_high * slice_size));
711 }
712 mip_offset += CalculateLevelSize(level_info, level);
713 }
714 return offsets;
715}
716
717std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) {
718 ASSERT(info.type == ImageType::e3D);
719 std::vector<SubresourceBase> subresources;
720 subresources.reserve(NumSlices(info));
721 for (s32 level = 0; level < info.resources.levels; ++level) {
722 const s32 depth = AdjustMipSize(info.size.depth, level);
723 for (s32 slice = 0; slice < depth; ++slice) {
724 subresources.emplace_back(SubresourceBase{
725 .level = level,
726 .layer = slice,
727 });
728 }
729 }
730 return subresources;
731}
732
733u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level) {
734 const Extent2D tile_size = DefaultBlockSize(info.format);
735 const Extent3D level_size = AdjustMipSize(info.size, level);
736 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
737 const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level);
738 const u32 bpp_log2 = BytesPerBlockLog2(info.format);
739 return StrideAlignment(num_tiles, block, bpp_log2, info.tile_width_spacing);
740}
741
742PixelFormat PixelFormatFromTIC(const TICEntry& config) noexcept {
743 return PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
744 config.a_type, config.srgb_conversion);
745}
746
747ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
748 switch (info.type) {
749 case ImageType::e2D:
750 return info.resources.layers > 1 ? ImageViewType::e2DArray : ImageViewType::e2D;
751 case ImageType::e3D:
752 return ImageViewType::e2DArray;
753 case ImageType::Linear:
754 return ImageViewType::e2D;
755 default:
756 UNIMPLEMENTED_MSG("Unimplemented image type={}", static_cast<int>(info.type));
757 return ImageViewType{};
758 }
759}
760
761std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src,
762 SubresourceBase base) {
763 ASSERT(dst.resources.levels >= src.resources.levels);
764 ASSERT(dst.num_samples == src.num_samples);
765
766 const bool is_dst_3d = dst.type == ImageType::e3D;
767 if (is_dst_3d) {
768 ASSERT(src.type == ImageType::e3D);
769 ASSERT(src.resources.levels == 1);
770 }
771
772 std::vector<ImageCopy> copies;
773 copies.reserve(src.resources.levels);
774 for (s32 level = 0; level < src.resources.levels; ++level) {
775 ImageCopy& copy = copies.emplace_back();
776 copy.src_subresource = SubresourceLayers{
777 .base_level = level,
778 .base_layer = 0,
779 .num_layers = src.resources.layers,
780 };
781 copy.dst_subresource = SubresourceLayers{
782 .base_level = base.level + level,
783 .base_layer = is_dst_3d ? 0 : base.layer,
784 .num_layers = is_dst_3d ? 1 : src.resources.layers,
785 };
786 copy.src_offset = Offset3D{
787 .x = 0,
788 .y = 0,
789 .z = 0,
790 };
791 copy.dst_offset = Offset3D{
792 .x = 0,
793 .y = 0,
794 .z = is_dst_3d ? base.layer : 0,
795 };
796 const Extent3D mip_size = AdjustMipSize(dst.size, base.level + level);
797 copy.extent = AdjustSamplesSize(mip_size, dst.num_samples);
798 if (is_dst_3d) {
799 copy.extent.depth = src.size.depth;
800 }
801 }
802 return copies;
803}
804
805bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
806 if (config.Address() == 0) {
807 return false;
808 }
809 if (config.Address() > (u64(1) << 48)) {
810 return false;
811 }
812 return gpu_memory.GpuToCpuAddress(config.Address()).has_value();
813}
814
815std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
816 const ImageInfo& info, std::span<u8> output) {
817 const size_t guest_size_bytes = CalculateGuestSizeInBytes(info);
818 const u32 bpp_log2 = BytesPerBlockLog2(info.format);
819 const Extent3D size = info.size;
820
821 if (info.type == ImageType::Linear) {
822 gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes);
823
824 ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch);
825 return {{
826 .buffer_offset = 0,
827 .buffer_size = guest_size_bytes,
828 .buffer_row_length = info.pitch >> bpp_log2,
829 .buffer_image_height = size.height,
830 .image_subresource =
831 {
832 .base_level = 0,
833 .base_layer = 0,
834 .num_layers = 1,
835 },
836 .image_offset = {0, 0, 0},
837 .image_extent = size,
838 }};
839 }
840 const auto input_data = std::make_unique<u8[]>(guest_size_bytes);
841 gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes);
842 const std::span<const u8> input(input_data.get(), guest_size_bytes);
843
844 const LevelInfo level_info = MakeLevelInfo(info);
845 const s32 num_layers = info.resources.layers;
846 const s32 num_levels = info.resources.levels;
847 const Extent2D tile_size = DefaultBlockSize(info.format);
848 const std::array level_sizes = CalculateLevelSizes(level_info, num_levels);
849 const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing);
850 const u32 layer_size = std::reduce(level_sizes.begin(), level_sizes.begin() + num_levels, 0);
851 const u32 layer_stride = AlignLayerSize(layer_size, size, level_info.block, tile_size.height,
852 info.tile_width_spacing);
853 size_t guest_offset = 0;
854 u32 host_offset = 0;
855 std::vector<BufferImageCopy> copies(num_levels);
856
857 for (s32 level = 0; level < num_levels; ++level) {
858 const Extent3D level_size = AdjustMipSize(size, level);
859 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
860 const u32 host_bytes_per_layer = num_blocks_per_layer << bpp_log2;
861 copies[level] = BufferImageCopy{
862 .buffer_offset = host_offset,
863 .buffer_size = static_cast<size_t>(host_bytes_per_layer) * num_layers,
864 .buffer_row_length = Common::AlignUp(level_size.width, tile_size.width),
865 .buffer_image_height = Common::AlignUp(level_size.height, tile_size.height),
866 .image_subresource =
867 {
868 .base_level = level,
869 .base_layer = 0,
870 .num_layers = info.resources.layers,
871 },
872 .image_offset = {0, 0, 0},
873 .image_extent = level_size,
874 };
875 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
876 const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
877 const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2);
878 size_t guest_layer_offset = 0;
879
880 for (s32 layer = 0; layer < info.resources.layers; ++layer) {
881 const std::span<u8> dst = output.subspan(host_offset);
882 const std::span<const u8> src = input.subspan(guest_offset + guest_layer_offset);
883 UnswizzleTexture(dst, src, 1U << bpp_log2, num_tiles.width, num_tiles.height,
884 num_tiles.depth, block.height, block.depth, stride_alignment);
885 guest_layer_offset += layer_stride;
886 host_offset += host_bytes_per_layer;
887 }
888 guest_offset += level_sizes[level];
889 }
890 return copies;
891}
892
893BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
894 const ImageBase& image, std::span<u8> output) {
895 gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes);
896 return BufferCopy{
897 .src_offset = 0,
898 .dst_offset = 0,
899 .size = image.guest_size_bytes,
900 };
901}
902
903void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
904 std::span<BufferImageCopy> copies) {
905 u32 output_offset = 0;
906
907 const Extent2D tile_size = DefaultBlockSize(info.format);
908 for (BufferImageCopy& copy : copies) {
909 const u32 level = copy.image_subresource.base_level;
910 const Extent3D mip_size = AdjustMipSize(info.size, level);
911 ASSERT(copy.image_offset == Offset3D{});
912 ASSERT(copy.image_subresource.base_layer == 0);
913 ASSERT(copy.image_extent == mip_size);
914 ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width));
915 ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height));
916
917 if (IsPixelFormatASTC(info.format)) {
918 ASSERT(copy.image_extent.depth == 1);
919 Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset),
920 copy.image_extent.width, copy.image_extent.height,
921 copy.image_subresource.num_layers, tile_size.width,
922 tile_size.height, output.subspan(output_offset));
923 } else {
924 DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent,
925 output.subspan(output_offset));
926 }
927 copy.buffer_offset = output_offset;
928 copy.buffer_row_length = mip_size.width;
929 copy.buffer_image_height = mip_size.height;
930
931 output_offset += copy.image_extent.width * copy.image_extent.height *
932 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
933 }
934}
935
936std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
937 const Extent3D size = info.size;
938 const u32 bytes_per_block = BytesPerBlock(info.format);
939 if (info.type == ImageType::Linear) {
940 ASSERT(info.pitch % bytes_per_block == 0);
941 return {{
942 .buffer_offset = 0,
943 .buffer_size = static_cast<size_t>(info.pitch) * size.height,
944 .buffer_row_length = info.pitch / bytes_per_block,
945 .buffer_image_height = size.height,
946 .image_subresource =
947 {
948 .base_level = 0,
949 .base_layer = 0,
950 .num_layers = 1,
951 },
952 .image_offset = {0, 0, 0},
953 .image_extent = size,
954 }};
955 }
956 UNIMPLEMENTED_IF(info.tile_width_spacing > 0);
957
958 const s32 num_layers = info.resources.layers;
959 const s32 num_levels = info.resources.levels;
960 const Extent2D tile_size = DefaultBlockSize(info.format);
961
962 u32 host_offset = 0;
963
964 std::vector<BufferImageCopy> copies(num_levels);
965 for (s32 level = 0; level < num_levels; ++level) {
966 const Extent3D level_size = AdjustMipSize(size, level);
967 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
968 const u32 host_bytes_per_level = num_blocks_per_layer * bytes_per_block * num_layers;
969 copies[level] = BufferImageCopy{
970 .buffer_offset = host_offset,
971 .buffer_size = host_bytes_per_level,
972 .buffer_row_length = level_size.width,
973 .buffer_image_height = level_size.height,
974 .image_subresource =
975 {
976 .base_level = level,
977 .base_layer = 0,
978 .num_layers = info.resources.layers,
979 },
980 .image_offset = {0, 0, 0},
981 .image_extent = level_size,
982 };
983 host_offset += host_bytes_per_level;
984 }
985 return copies;
986}
987
988Extent3D MipSize(Extent3D size, u32 level) {
989 return AdjustMipSize(size, level);
990}
991
992Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
993 const LevelInfo level_info = MakeLevelInfo(info);
994 const Extent2D tile_size = DefaultBlockSize(info.format);
995 const Extent3D level_size = AdjustMipSize(info.size, level);
996 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
997 return AdjustMipBlockSize(num_tiles, level_info.block, level);
998}
999
1000std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
1001 const Extent2D tile_size = DefaultBlockSize(info.format);
1002 if (info.type == ImageType::Linear) {
1003 return std::vector{SwizzleParameters{
1004 .num_tiles = AdjustTileSize(info.size, tile_size),
1005 .block = {},
1006 .buffer_offset = 0,
1007 .level = 0,
1008 }};
1009 }
1010 const LevelInfo level_info = MakeLevelInfo(info);
1011 const Extent3D size = info.size;
1012 const s32 num_levels = info.resources.levels;
1013
1014 u32 guest_offset = 0;
1015 std::vector<SwizzleParameters> params(num_levels);
1016 for (s32 level = 0; level < num_levels; ++level) {
1017 const Extent3D level_size = AdjustMipSize(size, level);
1018 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
1019 const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
1020 params[level] = SwizzleParameters{
1021 .num_tiles = num_tiles,
1022 .block = block,
1023 .buffer_offset = guest_offset,
1024 .level = level,
1025 };
1026 guest_offset += CalculateLevelSize(level_info, level);
1027 }
1028 return params;
1029}
1030
1031void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
1032 std::span<const BufferImageCopy> copies, std::span<const u8> memory) {
1033 const bool is_pitch_linear = info.type == ImageType::Linear;
1034 for (const BufferImageCopy& copy : copies) {
1035 if (is_pitch_linear) {
1036 SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory);
1037 } else {
1038 SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory);
1039 }
1040 }
1041}
1042
1043bool IsBlockLinearSizeCompatible(const ImageInfo& lhs, const ImageInfo& rhs, u32 lhs_level,
1044 u32 rhs_level, bool strict_size) noexcept {
1045 ASSERT(lhs.type != ImageType::Linear);
1046 ASSERT(rhs.type != ImageType::Linear);
1047 if (strict_size) {
1048 const Extent3D lhs_size = AdjustMipSize(lhs.size, lhs_level);
1049 const Extent3D rhs_size = AdjustMipSize(rhs.size, rhs_level);
1050 return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height;
1051 } else {
1052 const Extent3D lhs_size = BlockLinearAlignedSize(lhs, lhs_level);
1053 const Extent3D rhs_size = BlockLinearAlignedSize(rhs, rhs_level);
1054 return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height;
1055 }
1056}
1057
1058bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool strict_size) noexcept {
1059 ASSERT(lhs.type == ImageType::Linear);
1060 ASSERT(rhs.type == ImageType::Linear);
1061 if (strict_size) {
1062 return lhs.size.width == rhs.size.width && lhs.size.height == rhs.size.height;
1063 } else {
1064 const Extent2D lhs_size = PitchLinearAlignedSize(lhs);
1065 const Extent2D rhs_size = PitchLinearAlignedSize(rhs);
1066 return lhs_size == rhs_size;
1067 }
1068}
1069
1070std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr,
1071 VAddr cpu_addr, const ImageBase& overlap,
1072 bool strict_size, bool broken_views) {
1073 ASSERT(new_info.type != ImageType::Linear);
1074 ASSERT(overlap.info.type != ImageType::Linear);
1075 if (!IsLayerStrideCompatible(new_info, overlap.info)) {
1076 return std::nullopt;
1077 }
1078 if (!IsViewCompatible(overlap.info.format, new_info.format, broken_views)) {
1079 return std::nullopt;
1080 }
1081 if (gpu_addr == overlap.gpu_addr) {
1082 const std::optional solution = ResolveOverlapEqualAddress(new_info, overlap, strict_size);
1083 if (!solution) {
1084 return std::nullopt;
1085 }
1086 return OverlapResult{
1087 .gpu_addr = gpu_addr,
1088 .cpu_addr = cpu_addr,
1089 .resources = *solution,
1090 };
1091 }
1092 if (overlap.gpu_addr > gpu_addr) {
1093 return ResolveOverlapRightAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size);
1094 }
1095 // if overlap.gpu_addr < gpu_addr
1096 return ResolveOverlapLeftAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size);
1097}
1098
1099bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) {
1100 // If either of the layer strides is zero, we can assume they are compatible
1101 // These images generally come from rendertargets
1102 if (lhs.layer_stride == 0) {
1103 return true;
1104 }
1105 if (rhs.layer_stride == 0) {
1106 return true;
1107 }
1108 // It's definitely compatible if the layer stride matches
1109 if (lhs.layer_stride == rhs.layer_stride) {
1110 return true;
1111 }
1112 // Although we also have to compare for cases where it can be unaligned
1113 // This can happen if the image doesn't have layers, so the stride is not aligned
1114 if (lhs.maybe_unaligned_layer_stride == rhs.maybe_unaligned_layer_stride) {
1115 return true;
1116 }
1117 return false;
1118}
1119
1120std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image,
1121 GPUVAddr candidate_addr, RelaxedOptions options,
1122 bool broken_views) {
1123 const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr);
1124 if (!base) {
1125 return std::nullopt;
1126 }
1127 const ImageInfo& existing = image.info;
1128 if (False(options & RelaxedOptions::Format)) {
1129 if (!IsViewCompatible(existing.format, candidate.format, broken_views)) {
1130 return std::nullopt;
1131 }
1132 }
1133 if (!IsLayerStrideCompatible(existing, candidate)) {
1134 return std::nullopt;
1135 }
1136 if (existing.type != candidate.type) {
1137 return std::nullopt;
1138 }
1139 if (False(options & RelaxedOptions::Samples)) {
1140 if (existing.num_samples != candidate.num_samples) {
1141 return std::nullopt;
1142 }
1143 }
1144 if (existing.resources.levels < candidate.resources.levels + base->level) {
1145 return std::nullopt;
1146 }
1147 if (existing.type == ImageType::e3D) {
1148 const u32 mip_depth = std::max(1U, existing.size.depth << base->level);
1149 if (mip_depth < candidate.size.depth + base->layer) {
1150 return std::nullopt;
1151 }
1152 } else {
1153 if (existing.resources.layers < candidate.resources.layers + base->layer) {
1154 return std::nullopt;
1155 }
1156 }
1157 const bool strict_size = False(options & RelaxedOptions::Size);
1158 if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) {
1159 return std::nullopt;
1160 }
1161 // TODO: compare block sizes
1162 return base;
1163}
1164
1165bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr,
1166 RelaxedOptions options, bool broken_views) {
1167 return FindSubresource(candidate, image, candidate_addr, options, broken_views).has_value();
1168}
1169
1170void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
1171 const ImageBase* src) {
1172 if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
1173 src_info.format = src->info.format;
1174 }
1175 if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
1176 dst_info.format = dst->info.format;
1177 }
1178 if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
1179 dst_info.format = src->info.format;
1180 }
1181 if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
1182 src_info.format = src->info.format;
1183 }
1184}
1185
1186u32 MapSizeBytes(const ImageBase& image) {
1187 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
1188 return image.guest_size_bytes;
1189 } else if (True(image.flags & ImageFlagBits::Converted)) {
1190 return image.converted_size_bytes;
1191 } else {
1192 return image.unswizzled_size_bytes;
1193 }
1194}
1195
1196using P = PixelFormat;
1197
1198static_assert(CalculateLevelSize(LevelInfo{{1920, 1080}, {0, 2, 0}, {1, 1}, 2, 0}, 0) == 0x7f8000);
1199static_assert(CalculateLevelSize(LevelInfo{{32, 32}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000);
1200
1201static_assert(CalculateLevelOffset(P::R8_SINT, {1920, 1080}, {0, 2}, 1, 0, 7) == 0x2afc00);
1202static_assert(CalculateLevelOffset(P::ASTC_2D_12X12_UNORM, {8192, 4096}, {0, 2}, 1, 0, 12) ==
1203 0x50d200);
1204
1205static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 0) == 0);
1206static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 1) == 0x400000);
1207static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 2) == 0x500000);
1208static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 3) == 0x540000);
1209static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 4) == 0x550000);
1210static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 5) == 0x554000);
1211static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 6) == 0x555000);
1212static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 7) == 0x555400);
1213static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 8) == 0x555600);
1214static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 9) == 0x555800);
1215
1216constexpr u32 ValidateLayerSize(PixelFormat format, u32 width, u32 height, u32 block_height,
1217 u32 tile_width_spacing, u32 level) {
1218 const Extent3D size{width, height, 1};
1219 const Extent3D block{0, block_height, 0};
1220 const u32 offset = CalculateLevelOffset(format, size, block, 1, tile_width_spacing, level);
1221 return AlignLayerSize(offset, size, block, DefaultBlockHeight(format), tile_width_spacing);
1222}
1223
1224static_assert(ValidateLayerSize(P::ASTC_2D_12X12_UNORM, 8192, 4096, 2, 0, 12) == 0x50d800);
1225static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 1024, 1024, 2, 0, 10) == 0x556000);
1226static_assert(ValidateLayerSize(P::BC3_UNORM, 128, 128, 2, 0, 8) == 0x6000);
1227
1228static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 518, 572, 4, 3, 1) == 0x190000,
1229 "Tile width spacing is not working");
1230static_assert(ValidateLayerSize(P::BC5_UNORM, 1024, 1024, 3, 4, 11) == 0x160000,
1231 "Compressed tile width spacing is not working");
1232
1233} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
new file mode 100644
index 000000000..52a9207d6
--- /dev/null
+++ b/src/video_core/texture_cache/util.h
@@ -0,0 +1,109 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <span>
9
10#include "common/common_types.h"
11
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/surface.h"
14#include "video_core/texture_cache/image_base.h"
15#include "video_core/texture_cache/image_view_base.h"
16#include "video_core/texture_cache/types.h"
17#include "video_core/textures/texture.h"
18
19namespace VideoCommon {
20
21using Tegra::Texture::TICEntry;
22
23struct OverlapResult {
24 GPUVAddr gpu_addr;
25 VAddr cpu_addr;
26 SubresourceExtent resources;
27};
28
29[[nodiscard]] u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept;
30
31[[nodiscard]] u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept;
32
33[[nodiscard]] u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept;
34
35[[nodiscard]] u32 CalculateLayerStride(const ImageInfo& info) noexcept;
36
37[[nodiscard]] u32 CalculateLayerSize(const ImageInfo& info) noexcept;
38
39[[nodiscard]] std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(
40 const ImageInfo& info) noexcept;
41
42[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
43
44[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
45
46[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level);
47
48[[nodiscard]] VideoCore::Surface::PixelFormat PixelFormatFromTIC(
49 const Tegra::Texture::TICEntry& config) noexcept;
50
51[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept;
52
53[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst,
54 const ImageInfo& src,
55 SubresourceBase base);
56
57[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
58
59[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
60 GPUVAddr gpu_addr, const ImageInfo& info,
61 std::span<u8> output);
62
63[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
64 const ImageBase& image, std::span<u8> output);
65
66void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
67 std::span<BufferImageCopy> copies);
68
69[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info);
70
71[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);
72
73[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level);
74
75[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info);
76
77void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
78 std::span<const BufferImageCopy> copies, std::span<const u8> memory);
79
80[[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info,
81 const ImageInfo& overlap_info, u32 new_level,
82 u32 overlap_level, bool strict_size) noexcept;
83
84[[nodiscard]] bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs,
85 bool strict_size) noexcept;
86
87[[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info,
88 GPUVAddr gpu_addr, VAddr cpu_addr,
89 const ImageBase& overlap,
90 bool strict_size, bool broken_views);
91
92[[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs);
93
94[[nodiscard]] std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate,
95 const ImageBase& image,
96 GPUVAddr candidate_addr,
97 RelaxedOptions options,
98 bool broken_views);
99
100[[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image,
101 GPUVAddr candidate_addr, RelaxedOptions options,
102 bool broken_views);
103
104void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
105 const ImageBase* src);
106
107[[nodiscard]] u32 MapSizeBytes(const ImageBase& image);
108
109} // namespace VideoCommon
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 365bde2f1..acd5bdd78 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -18,6 +18,7 @@
18#include <algorithm> 18#include <algorithm>
19#include <cassert> 19#include <cassert>
20#include <cstring> 20#include <cstring>
21#include <span>
21#include <vector> 22#include <vector>
22 23
23#include <boost/container/static_vector.hpp> 24#include <boost/container/static_vector.hpp>
@@ -600,7 +601,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
600 return params; 601 return params;
601} 602}
602 603
603static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth, 604static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
604 u32 blockHeight) { 605 u32 blockHeight) {
605 // Don't actually care about the void extent, just read the bits... 606 // Don't actually care about the void extent, just read the bits...
606 for (s32 i = 0; i < 4; ++i) { 607 for (s32 i = 0; i < 4; ++i) {
@@ -623,7 +624,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 block
623 } 624 }
624} 625}
625 626
626static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { 627static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
627 for (u32 j = 0; j < blockHeight; j++) { 628 for (u32 j = 0; j < blockHeight; j++) {
628 for (u32 i = 0; i < blockWidth; i++) { 629 for (u32 i = 0; i < blockWidth; i++) {
629 outBuf[j * blockWidth + i] = 0xFFFF00FF; 630 outBuf[j * blockWidth + i] = 0xFFFF00FF;
@@ -1438,9 +1439,9 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
1438#undef READ_INT_VALUES 1439#undef READ_INT_VALUES
1439} 1440}
1440 1441
1441static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight, 1442static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
1442 u32* outBuf) { 1443 const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {
1443 InputBitStream strm(inBuf); 1444 InputBitStream strm(inBuf.data());
1444 TexelWeightParams weightParams = DecodeBlockInfo(strm); 1445 TexelWeightParams weightParams = DecodeBlockInfo(strm);
1445 1446
1446 // Was there an error? 1447 // Was there an error?
@@ -1601,8 +1602,8 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1601 } 1602 }
1602 1603
1603 // Read the texel weight data.. 1604 // Read the texel weight data..
1604 u8 texelWeightData[16]; 1605 std::array<u8, 16> texelWeightData;
1605 memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); 1606 std::ranges::copy(inBuf, texelWeightData.begin());
1606 1607
1607 // Reverse everything 1608 // Reverse everything
1608 for (u32 i = 0; i < 8; i++) { 1609 for (u32 i = 0; i < 8; i++) {
@@ -1618,14 +1619,15 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1618 1619
1619 // Make sure that higher non-texel bits are set to zero 1620 // Make sure that higher non-texel bits are set to zero
1620 const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; 1621 const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
1621 texelWeightData[clearByteStart - 1] = 1622 if (clearByteStart > 0) {
1622 texelWeightData[clearByteStart - 1] & 1623 texelWeightData[clearByteStart - 1] &=
1623 static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); 1624 static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
1624 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); 1625 }
1626 std::memset(texelWeightData.data() + clearByteStart, 0, std::min(16U - clearByteStart, 16U));
1625 1627
1626 IntegerEncodedVector texelWeightValues; 1628 IntegerEncodedVector texelWeightValues;
1627 1629
1628 InputBitStream weightStream(texelWeightData); 1630 InputBitStream weightStream(texelWeightData.data());
1629 1631
1630 DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, 1632 DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight,
1631 weightParams.GetNumWeightValues()); 1633 weightParams.GetNumWeightValues());
@@ -1672,36 +1674,32 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1672 1674
1673namespace Tegra::Texture::ASTC { 1675namespace Tegra::Texture::ASTC {
1674 1676
1675std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width, 1677void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
1676 u32 block_height) { 1678 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) {
1677 u32 blockIdx = 0; 1679 u32 block_index = 0;
1678 std::size_t depth_offset = 0; 1680 std::size_t depth_offset = 0;
1679 std::vector<u8> outData(height * width * depth * 4); 1681 for (u32 z = 0; z < depth; z++) {
1680 for (u32 k = 0; k < depth; k++) { 1682 for (u32 y = 0; y < height; y += block_height) {
1681 for (u32 j = 0; j < height; j += block_height) { 1683 for (u32 x = 0; x < width; x += block_width) {
1682 for (u32 i = 0; i < width; i += block_width) { 1684 const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)};
1683
1684 const u8* blockPtr = data + blockIdx * 16;
1685 1685
1686 // Blocks can be at most 12x12 1686 // Blocks can be at most 12x12
1687 u32 uncompData[144]; 1687 std::array<u32, 12 * 12> uncompData;
1688 ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); 1688 ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
1689 1689
1690 u32 decompWidth = std::min(block_width, width - i); 1690 u32 decompWidth = std::min(block_width, width - x);
1691 u32 decompHeight = std::min(block_height, height - j); 1691 u32 decompHeight = std::min(block_height, height - y);
1692 1692
1693 u8* outRow = depth_offset + outData.data() + (j * width + i) * 4; 1693 const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4);
1694 for (u32 jj = 0; jj < decompHeight; jj++) { 1694 for (u32 jj = 0; jj < decompHeight; jj++) {
1695 memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); 1695 std::memcpy(outRow.data() + jj * width * 4,
1696 uncompData.data() + jj * block_width, decompWidth * 4);
1696 } 1697 }
1697 1698 ++block_index;
1698 blockIdx++;
1699 } 1699 }
1700 } 1700 }
1701 depth_offset += height * width * 4; 1701 depth_offset += height * width * 4;
1702 } 1702 }
1703
1704 return outData;
1705} 1703}
1706 1704
1707} // namespace Tegra::Texture::ASTC 1705} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index 991cdba72..9105119bc 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -5,11 +5,10 @@
5#pragma once 5#pragma once
6 6
7#include <cstdint> 7#include <cstdint>
8#include <vector>
9 8
10namespace Tegra::Texture::ASTC { 9namespace Tegra::Texture::ASTC {
11 10
12std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, 11void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
13 uint32_t depth, uint32_t block_width, uint32_t block_height); 12 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
14 13
15} // namespace Tegra::Texture::ASTC 14} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
deleted file mode 100644
index bd1aebf02..000000000
--- a/src/video_core/textures/convert.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstring>
7#include <tuple>
8#include <vector>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/surface.h"
14#include "video_core/textures/astc.h"
15#include "video_core/textures/convert.h"
16
17namespace Tegra::Texture {
18
19using VideoCore::Surface::PixelFormat;
20
21template <bool reverse>
22void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
23 union S8Z24 {
24 BitField<0, 24, u32> z24;
25 BitField<24, 8, u32> s8;
26 };
27 static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
28
29 union Z24S8 {
30 BitField<0, 8, u32> s8;
31 BitField<8, 24, u32> z24;
32 };
33 static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
34
35 S8Z24 s8z24_pixel{};
36 Z24S8 z24s8_pixel{};
37 constexpr auto bpp{
38 VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)};
39 for (std::size_t y = 0; y < height; ++y) {
40 for (std::size_t x = 0; x < width; ++x) {
41 const std::size_t offset{bpp * (y * width + x)};
42 if constexpr (reverse) {
43 std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
44 s8z24_pixel.s8.Assign(z24s8_pixel.s8);
45 s8z24_pixel.z24.Assign(z24s8_pixel.z24);
46 std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
47 } else {
48 std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
49 z24s8_pixel.s8.Assign(s8z24_pixel.s8);
50 z24s8_pixel.z24.Assign(s8z24_pixel.z24);
51 std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
52 }
53 }
54 }
55}
56
57static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
58 SwapS8Z24ToZ24S8<false>(data, width, height);
59}
60
61static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
62 SwapS8Z24ToZ24S8<true>(data, width, height);
63}
64
65void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width,
66 u32 height, u32 depth, bool convert_astc, bool convert_s8z24) {
67 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
68 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
69 u32 block_width{};
70 u32 block_height{};
71 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
72 const std::vector<u8> rgba8_data = Tegra::Texture::ASTC::Decompress(
73 in_data, width, height, depth, block_width, block_height);
74 std::copy(rgba8_data.begin(), rgba8_data.end(), out_data);
75
76 } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
77 Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height);
78 }
79}
80
81void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
82 bool convert_astc, bool convert_s8z24) {
83 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
84 LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
85 pixel_format);
86 UNREACHABLE();
87
88 } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
89 Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
90 }
91}
92
93} // namespace Tegra::Texture
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
deleted file mode 100644
index d5d6c77bb..000000000
--- a/src/video_core/textures/convert.h
+++ /dev/null
@@ -1,22 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCore::Surface {
10enum class PixelFormat;
11}
12
13namespace Tegra::Texture {
14
15void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format,
16 u32 width, u32 height, u32 depth, bool convert_astc,
17 bool convert_s8z24);
18
19void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
20 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
21
22} // namespace Tegra::Texture
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 16d46a018..9f5181318 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -2,204 +2,111 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
5#include <cmath> 6#include <cmath>
6#include <cstring> 7#include <cstring>
8#include <span>
9#include <utility>
10
7#include "common/alignment.h" 11#include "common/alignment.h"
8#include "common/assert.h" 12#include "common/assert.h"
9#include "common/bit_util.h" 13#include "common/bit_util.h"
14#include "common/div_ceil.h"
10#include "video_core/gpu.h" 15#include "video_core/gpu.h"
11#include "video_core/textures/decoders.h" 16#include "video_core/textures/decoders.h"
12#include "video_core/textures/texture.h" 17#include "video_core/textures/texture.h"
13 18
14namespace Tegra::Texture { 19namespace Tegra::Texture {
15namespace {
16 20
21namespace {
17/** 22/**
18 * This table represents the internal swizzle of a gob, 23 * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing.
19 * in format 16 bytes x 2 sector packing.
20 * Calculates the offset of an (x, y) position within a swizzled texture. 24 * Calculates the offset of an (x, y) position within a swizzled texture.
21 * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188 25 * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188
22 */ 26 */
23template <std::size_t N, std::size_t M, u32 Align> 27constexpr SwizzleTable MakeSwizzleTableConst() {
24struct alignas(64) SwizzleTable { 28 SwizzleTable table{};
25 static_assert(M * Align == 64, "Swizzle Table does not align to GOB"); 29 for (u32 y = 0; y < table.size(); ++y) {
26 constexpr SwizzleTable() { 30 for (u32 x = 0; x < table[0].size(); ++x) {
27 for (u32 y = 0; y < N; ++y) { 31 table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 +
28 for (u32 x = 0; x < M; ++x) { 32 (y % 2) * 16 + (x % 16);
29 const u32 x2 = x * Align;
30 values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 +
31 ((x2 % 32) / 16) * 32 + (y % 2) * 16 + (x2 % 16));
32 }
33 } 33 }
34 } 34 }
35 const std::array<u16, M>& operator[](std::size_t index) const { 35 return table;
36 return values[index]; 36}
37 }
38 std::array<std::array<u16, M>, N> values{};
39};
40 37
41constexpr u32 FAST_SWIZZLE_ALIGN = 16; 38constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst();
42 39
43constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_X, GOB_SIZE_X, GOB_SIZE_Z>(); 40template <bool TO_LINEAR>
44constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_Y, 4, FAST_SWIZZLE_ALIGN>(); 41void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
42 u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
43 // The origin of the transformation can be configured here, leave it as zero as the current API
44 // doesn't expose it.
45 static constexpr u32 origin_x = 0;
46 static constexpr u32 origin_y = 0;
47 static constexpr u32 origin_z = 0;
45 48
46/** 49 // We can configure here a custom pitch
47 * This function manages ALL the GOBs(Group of Bytes) Inside a single block. 50 // As it's not exposed 'width * bpp' will be the expected pitch.
48 * Instead of going gob by gob, we map the coordinates inside a block and manage from 51 const u32 pitch = width * bytes_per_pixel;
49 * those. Block_Width is assumed to be 1. 52 const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel;
50 */
51void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
52 const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
53 const u32 y_end, const u32 z_end, const u32 tile_offset,
54 const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
55 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
56 std::array<u8*, 2> data_ptrs;
57 u32 z_address = tile_offset;
58
59 for (u32 z = z_start; z < z_end; z++) {
60 u32 y_address = z_address;
61 u32 pixel_base = layer_z * z + y_start * stride_x;
62 for (u32 y = y_start; y < y_end; y++) {
63 const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y];
64 for (u32 x = x_start; x < x_end; x++) {
65 const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]};
66 const u32 pixel_index{x * out_bytes_per_pixel + pixel_base};
67 data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
68 data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
69 std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
70 }
71 pixel_base += stride_x;
72 if ((y + 1) % GOB_SIZE_Y == 0)
73 y_address += GOB_SIZE;
74 }
75 z_address += xy_block_size;
76 }
77}
78 53
79/** 54 const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
80 * This function manages ALL the GOBs(Group of Bytes) Inside a single block. 55 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
81 * Instead of going gob by gob, we map the coordinates inside a block and manage from 56 const u32 slice_size =
82 * those. Block_Width is assumed to be 1. 57 Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size;
83 */
84void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
85 const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
86 const u32 y_end, const u32 z_end, const u32 tile_offset,
87 const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
88 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
89 std::array<u8*, 2> data_ptrs;
90 u32 z_address = tile_offset;
91 const u32 x_startb = x_start * bytes_per_pixel;
92 const u32 x_endb = x_end * bytes_per_pixel;
93
94 for (u32 z = z_start; z < z_end; z++) {
95 u32 y_address = z_address;
96 u32 pixel_base = layer_z * z + y_start * stride_x;
97 for (u32 y = y_start; y < y_end; y++) {
98 const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y];
99 for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) {
100 const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]};
101 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
102 const u32 pixel_index{out_x + pixel_base};
103 data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
104 data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
105 std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN);
106 }
107 pixel_base += stride_x;
108 if ((y + 1) % GOB_SIZE_Y == 0)
109 y_address += GOB_SIZE;
110 }
111 z_address += xy_block_size;
112 }
113}
114 58
115/** 59 const u32 block_height_mask = (1U << block_height) - 1;
116 * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue. 60 const u32 block_depth_mask = (1U << block_depth) - 1;
117 * The body of this function takes care of splitting the swizzled texture into blocks, 61 const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth;
118 * and managing the extents of it. Once all the parameters of a single block are obtained, 62
119 * the function calls 'ProcessBlock' to process that particular Block. 63 for (u32 slice = 0; slice < depth; ++slice) {
120 * 64 const u32 z = slice + origin_z;
121 * Documentation for the memory layout and decoding can be found at: 65 const u32 offset_z = (z >> block_depth) * slice_size +
122 * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces 66 ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height));
123 */ 67 for (u32 line = 0; line < height; ++line) {
124template <bool fast> 68 const u32 y = line + origin_y;
125void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, 69 const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y];
126 const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel, 70
127 const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth, 71 const u32 block_y = y >> GOB_SIZE_Y_SHIFT;
128 const u32 width_spacing) { 72 const u32 offset_y = (block_y >> block_height) * block_size +
129 auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; 73 ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
130 const u32 stride_x = width * out_bytes_per_pixel; 74
131 const u32 layer_z = height * stride_x; 75 for (u32 column = 0; column < width; ++column) {
132 const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel; 76 const u32 x = (column + origin_x) * bytes_per_pixel;
133 constexpr u32 gob_elements_y = GOB_SIZE_Y; 77 const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
134 constexpr u32 gob_elements_z = GOB_SIZE_Z; 78
135 const u32 block_x_elements = gob_elements_x; 79 const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
136 const u32 block_y_elements = gob_elements_y * block_height; 80 const u32 swizzled_offset = base_swizzled_offset + table[x % GOB_SIZE_X];
137 const u32 block_z_elements = gob_elements_z * block_depth; 81
138 const u32 aligned_width = Common::AlignUp(width, gob_elements_x * width_spacing); 82 const u32 unswizzled_offset =
139 const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements); 83 slice * pitch * height + line * pitch + column * bytes_per_pixel;
140 const u32 blocks_on_y = div_ceil(height, block_y_elements); 84
141 const u32 blocks_on_z = div_ceil(depth, block_z_elements); 85 u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
142 const u32 xy_block_size = GOB_SIZE * block_height; 86 const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];
143 const u32 block_size = xy_block_size * block_depth; 87 std::memcpy(dst, src, bytes_per_pixel);
144 u32 tile_offset = 0;
145 for (u32 zb = 0; zb < blocks_on_z; zb++) {
146 const u32 z_start = zb * block_z_elements;
147 const u32 z_end = std::min(depth, z_start + block_z_elements);
148 for (u32 yb = 0; yb < blocks_on_y; yb++) {
149 const u32 y_start = yb * block_y_elements;
150 const u32 y_end = std::min(height, y_start + block_y_elements);
151 for (u32 xb = 0; xb < blocks_on_x; xb++) {
152 const u32 x_start = xb * block_x_elements;
153 const u32 x_end = std::min(width, x_start + block_x_elements);
154 if constexpr (fast) {
155 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
156 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
157 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
158 } else {
159 PreciseProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
160 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
161 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
162 }
163 tile_offset += block_size;
164 } 88 }
165 } 89 }
166 } 90 }
167} 91}
168
169} // Anonymous namespace 92} // Anonymous namespace
170 93
171void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, 94SwizzleTable MakeSwizzleTable() {
172 u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, 95 return SWIZZLE_TABLE;
173 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) {
174 const u32 block_height_size{1U << block_height};
175 const u32 block_depth_size{1U << block_depth};
176 if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) {
177 SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
178 bytes_per_pixel, out_bytes_per_pixel, block_height_size,
179 block_depth_size, width_spacing);
180 } else {
181 SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
182 bytes_per_pixel, out_bytes_per_pixel, block_height_size,
183 block_depth_size, width_spacing);
184 }
185} 96}
186 97
187void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, 98void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
188 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, 99 u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
189 u32 block_depth, u32 width_spacing) { 100 u32 stride_alignment) {
190 CopySwizzledData((width + tile_size_x - 1) / tile_size_x, 101 Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
191 (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, 102 stride_alignment);
192 bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth,
193 width_spacing);
194} 103}
195 104
196std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, 105void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
197 u32 width, u32 height, u32 depth, u32 block_height, 106 u32 height, u32 depth, u32 block_height, u32 block_depth,
198 u32 block_depth, u32 width_spacing) { 107 u32 stride_alignment) {
199 std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); 108 Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
200 UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, 109 stride_alignment);
201 width, height, depth, block_height, block_depth, width_spacing);
202 return unswizzled_data;
203} 110}
204 111
205void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 112void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
@@ -213,7 +120,7 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
213 const u32 gob_address_y = 120 const u32 gob_address_y =
214 (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + 121 (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
215 ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; 122 ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
216 const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; 123 const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y];
217 for (u32 x = 0; x < subrect_width; ++x) { 124 for (u32 x = 0; x < subrect_width; ++x) {
218 const u32 dst_x = x + offset_x; 125 const u32 dst_x = x + offset_x;
219 const u32 gob_address = 126 const u32 gob_address =
@@ -235,11 +142,11 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width,
235 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); 142 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
236 143
237 const u32 block_height_mask = (1U << block_height) - 1; 144 const u32 block_height_mask = (1U << block_height) - 1;
238 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height; 145 const u32 x_shift = GOB_SIZE_SHIFT + block_height;
239 146
240 for (u32 line = 0; line < line_count; ++line) { 147 for (u32 line = 0; line < line_count; ++line) {
241 const u32 src_y = line + origin_y; 148 const u32 src_y = line + origin_y;
242 const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; 149 const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
243 150
244 const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; 151 const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
245 const u32 src_offset_y = (block_y >> block_height) * block_size + 152 const u32 src_offset_y = (block_y >> block_height) * block_size +
@@ -270,7 +177,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt
270 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth; 177 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
271 178
272 for (u32 line = 0; line < line_count; ++line) { 179 for (u32 line = 0; line < line_count; ++line) {
273 const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; 180 const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y];
274 const u32 block_y = line / GOB_SIZE_Y; 181 const u32 block_y = line / GOB_SIZE_Y;
275 const u32 dst_offset_y = 182 const u32 dst_offset_y =
276 (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; 183 (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE;
@@ -293,7 +200,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32
293 const std::size_t gob_address_y = 200 const std::size_t gob_address_y =
294 (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + 201 (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
295 ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; 202 ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
296 const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; 203 const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y];
297 for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { 204 for (std::size_t x = dst_x; x < width && count < copy_size; ++x) {
298 const std::size_t gob_address = 205 const std::size_t gob_address =
299 gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; 206 gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height;
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 01e156bc8..d7cdc81e8 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -4,7 +4,8 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <vector> 7#include <span>
8
8#include "common/common_types.h" 9#include "common/common_types.h"
9#include "video_core/textures/texture.h" 10#include "video_core/textures/texture.h"
10 11
@@ -15,28 +16,25 @@ constexpr u32 GOB_SIZE_Y = 8;
15constexpr u32 GOB_SIZE_Z = 1; 16constexpr u32 GOB_SIZE_Z = 1;
16constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; 17constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
17 18
18constexpr std::size_t GOB_SIZE_X_SHIFT = 6; 19constexpr u32 GOB_SIZE_X_SHIFT = 6;
19constexpr std::size_t GOB_SIZE_Y_SHIFT = 3; 20constexpr u32 GOB_SIZE_Y_SHIFT = 3;
20constexpr std::size_t GOB_SIZE_Z_SHIFT = 0; 21constexpr u32 GOB_SIZE_Z_SHIFT = 0;
21constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; 22constexpr u32 GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
22 23
23/// Unswizzles a swizzled texture without changing its format. 24using SwizzleTable = std::array<std::array<u32, GOB_SIZE_X>, GOB_SIZE_Y>;
24void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, 25
25 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 26/// Returns a z-order swizzle table
26 u32 block_height = TICEntry::DefaultBlockHeight, 27SwizzleTable MakeSwizzleTable();
27 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); 28
28 29/// Unswizzles a block linear texture into linear memory.
29/// Unswizzles a swizzled texture without changing its format. 30void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
30std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, 31 u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
31 u32 width, u32 height, u32 depth, 32 u32 stride_alignment = 1);
32 u32 block_height = TICEntry::DefaultBlockHeight, 33
33 u32 block_depth = TICEntry::DefaultBlockHeight, 34/// Swizzles linear memory into a block linear texture.
34 u32 width_spacing = 0); 35void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
35 36 u32 height, u32 depth, u32 block_height, u32 block_depth,
36/// Copies texture data from a buffer and performs swizzling/unswizzling as necessary. 37 u32 stride_alignment = 1);
37void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
38 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
39 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
40 38
41/// This function calculates the correct size of a texture depending if it's tiled or not. 39/// This function calculates the correct size of a texture depending if it's tiled or not.
42std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 40std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
index 4171e3ef2..ae5621a7d 100644
--- a/src/video_core/textures/texture.cpp
+++ b/src/video_core/textures/texture.cpp
@@ -5,9 +5,13 @@
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7 7
8#include "common/cityhash.h"
8#include "core/settings.h" 9#include "core/settings.h"
9#include "video_core/textures/texture.h" 10#include "video_core/textures/texture.h"
10 11
12using Tegra::Texture::TICEntry;
13using Tegra::Texture::TSCEntry;
14
11namespace Tegra::Texture { 15namespace Tegra::Texture {
12 16
13namespace { 17namespace {
@@ -65,7 +69,7 @@ unsigned SettingsMinimumAnisotropy() noexcept {
65 69
66} // Anonymous namespace 70} // Anonymous namespace
67 71
68std::array<float, 4> TSCEntry::GetBorderColor() const noexcept { 72std::array<float, 4> TSCEntry::BorderColor() const noexcept {
69 if (!srgb_conversion) { 73 if (!srgb_conversion) {
70 return border_color; 74 return border_color;
71 } 75 }
@@ -73,8 +77,16 @@ std::array<float, 4> TSCEntry::GetBorderColor() const noexcept {
73 SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; 77 SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
74} 78}
75 79
76float TSCEntry::GetMaxAnisotropy() const noexcept { 80float TSCEntry::MaxAnisotropy() const noexcept {
77 return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); 81 return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy()));
78} 82}
79 83
80} // namespace Tegra::Texture 84} // namespace Tegra::Texture
85
86size_t std::hash<TICEntry>::operator()(const TICEntry& tic) const noexcept {
87 return Common::CityHash64(reinterpret_cast<const char*>(&tic), sizeof tic);
88}
89
90size_t std::hash<TSCEntry>::operator()(const TSCEntry& tsc) const noexcept {
91 return Common::CityHash64(reinterpret_cast<const char*>(&tsc), sizeof tsc);
92}
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index bbc7e3eaf..c1d14335e 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -53,27 +53,27 @@ enum class TextureFormat : u32 {
53 BC4 = 0x27, 53 BC4 = 0x27,
54 BC5 = 0x28, 54 BC5 = 0x28,
55 S8D24 = 0x29, 55 S8D24 = 0x29,
56 X8Z24 = 0x2a, 56 X8D24 = 0x2a,
57 D24S8 = 0x2b, 57 D24S8 = 0x2b,
58 X4V4Z24__COV4R4V = 0x2c, 58 X4V4D24__COV4R4V = 0x2c,
59 X4V4Z24__COV8R8V = 0x2d, 59 X4V4D24__COV8R8V = 0x2d,
60 V8Z24__COV4R12V = 0x2e, 60 V8D24__COV4R12V = 0x2e,
61 D32 = 0x2f, 61 D32 = 0x2f,
62 D32S8 = 0x30, 62 D32S8 = 0x30,
63 X8Z24_X20V4S8__COV4R4V = 0x31, 63 X8D24_X20V4S8__COV4R4V = 0x31,
64 X8Z24_X20V4S8__COV8R8V = 0x32, 64 X8D24_X20V4S8__COV8R8V = 0x32,
65 ZF32_X20V4X8__COV4R4V = 0x33, 65 D32_X20V4X8__COV4R4V = 0x33,
66 ZF32_X20V4X8__COV8R8V = 0x34, 66 D32_X20V4X8__COV8R8V = 0x34,
67 ZF32_X20V4S8__COV4R4V = 0x35, 67 D32_X20V4S8__COV4R4V = 0x35,
68 ZF32_X20V4S8__COV8R8V = 0x36, 68 D32_X20V4S8__COV8R8V = 0x36,
69 X8Z24_X16V8S8__COV4R12V = 0x37, 69 X8D24_X16V8S8__COV4R12V = 0x37,
70 ZF32_X16V8X8__COV4R12V = 0x38, 70 D32_X16V8X8__COV4R12V = 0x38,
71 ZF32_X16V8S8__COV4R12V = 0x39, 71 D32_X16V8S8__COV4R12V = 0x39,
72 D16 = 0x3a, 72 D16 = 0x3a,
73 V8Z24__COV8R24V = 0x3b, 73 V8D24__COV8R24V = 0x3b,
74 X8Z24_X16V8S8__COV8R24V = 0x3c, 74 X8D24_X16V8S8__COV8R24V = 0x3c,
75 ZF32_X16V8X8__COV8R24V = 0x3d, 75 D32_X16V8X8__COV8R24V = 0x3d,
76 ZF32_X16V8S8__COV8R24V = 0x3e, 76 D32_X16V8S8__COV8R24V = 0x3e,
77 ASTC_2D_4X4 = 0x40, 77 ASTC_2D_4X4 = 0x40,
78 ASTC_2D_5X5 = 0x41, 78 ASTC_2D_5X5 = 0x41,
79 ASTC_2D_6X6 = 0x42, 79 ASTC_2D_6X6 = 0x42,
@@ -146,7 +146,7 @@ enum class MsaaMode : u32 {
146}; 146};
147 147
148union TextureHandle { 148union TextureHandle {
149 /* implicit */ TextureHandle(u32 raw_) : raw{raw_} {} 149 /* implicit */ constexpr TextureHandle(u32 raw_) : raw{raw_} {}
150 150
151 u32 raw; 151 u32 raw;
152 BitField<0, 20, u32> tic_id; 152 BitField<0, 20, u32> tic_id;
@@ -155,124 +155,124 @@ union TextureHandle {
155static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); 155static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
156 156
157struct TICEntry { 157struct TICEntry {
158 static constexpr u32 DefaultBlockHeight = 16;
159 static constexpr u32 DefaultBlockDepth = 1;
160
161 union {
162 u32 raw;
163 BitField<0, 7, TextureFormat> format;
164 BitField<7, 3, ComponentType> r_type;
165 BitField<10, 3, ComponentType> g_type;
166 BitField<13, 3, ComponentType> b_type;
167 BitField<16, 3, ComponentType> a_type;
168
169 BitField<19, 3, SwizzleSource> x_source;
170 BitField<22, 3, SwizzleSource> y_source;
171 BitField<25, 3, SwizzleSource> z_source;
172 BitField<28, 3, SwizzleSource> w_source;
173 };
174 u32 address_low;
175 union { 158 union {
176 BitField<0, 16, u32> address_high; 159 struct {
177 BitField<21, 3, TICHeaderVersion> header_version; 160 union {
178 }; 161 BitField<0, 7, TextureFormat> format;
179 union { 162 BitField<7, 3, ComponentType> r_type;
180 BitField<0, 3, u32> block_width; 163 BitField<10, 3, ComponentType> g_type;
181 BitField<3, 3, u32> block_height; 164 BitField<13, 3, ComponentType> b_type;
182 BitField<6, 3, u32> block_depth; 165 BitField<16, 3, ComponentType> a_type;
166
167 BitField<19, 3, SwizzleSource> x_source;
168 BitField<22, 3, SwizzleSource> y_source;
169 BitField<25, 3, SwizzleSource> z_source;
170 BitField<28, 3, SwizzleSource> w_source;
171 };
172 u32 address_low;
173 union {
174 BitField<0, 16, u32> address_high;
175 BitField<16, 5, u32> layer_base_3_7;
176 BitField<21, 3, TICHeaderVersion> header_version;
177 BitField<24, 1, u32> load_store_hint;
178 BitField<25, 4, u32> view_coherency_hash;
179 BitField<29, 3, u32> layer_base_8_10;
180 };
181 union {
182 BitField<0, 3, u32> block_width;
183 BitField<3, 3, u32> block_height;
184 BitField<6, 3, u32> block_depth;
183 185
184 BitField<10, 3, u32> tile_width_spacing; 186 BitField<10, 3, u32> tile_width_spacing;
185 187
186 // High 16 bits of the pitch value 188 // High 16 bits of the pitch value
187 BitField<0, 16, u32> pitch_high; 189 BitField<0, 16, u32> pitch_high;
188 BitField<26, 1, u32> use_header_opt_control; 190 BitField<26, 1, u32> use_header_opt_control;
189 BitField<27, 1, u32> depth_texture; 191 BitField<27, 1, u32> depth_texture;
190 BitField<28, 4, u32> max_mip_level; 192 BitField<28, 4, u32> max_mip_level;
191 193
192 BitField<0, 16, u32> buffer_high_width_minus_one; 194 BitField<0, 16, u32> buffer_high_width_minus_one;
193 }; 195 };
194 union { 196 union {
195 BitField<0, 16, u32> width_minus_1; 197 BitField<0, 16, u32> width_minus_one;
196 BitField<22, 1, u32> srgb_conversion; 198 BitField<16, 3, u32> layer_base_0_2;
197 BitField<23, 4, TextureType> texture_type; 199 BitField<22, 1, u32> srgb_conversion;
198 BitField<29, 3, u32> border_size; 200 BitField<23, 4, TextureType> texture_type;
201 BitField<29, 3, u32> border_size;
199 202
200 BitField<0, 16, u32> buffer_low_width_minus_one; 203 BitField<0, 16, u32> buffer_low_width_minus_one;
201 }; 204 };
202 union { 205 union {
203 BitField<0, 16, u32> height_minus_1; 206 BitField<0, 16, u32> height_minus_1;
204 BitField<16, 14, u32> depth_minus_1; 207 BitField<16, 14, u32> depth_minus_1;
205 }; 208 BitField<30, 1, u32> is_sparse;
206 union { 209 BitField<31, 1, u32> normalized_coords;
207 BitField<6, 13, u32> mip_lod_bias; 210 };
208 BitField<27, 3, u32> max_anisotropy; 211 union {
212 BitField<6, 13, u32> mip_lod_bias;
213 BitField<27, 3, u32> max_anisotropy;
214 };
215 union {
216 BitField<0, 4, u32> res_min_mip_level;
217 BitField<4, 4, u32> res_max_mip_level;
218 BitField<8, 4, MsaaMode> msaa_mode;
219 BitField<12, 12, u32> min_lod_clamp;
220 };
221 };
222 std::array<u64, 4> raw;
209 }; 223 };
210 224
211 union { 225 constexpr bool operator==(const TICEntry& rhs) const noexcept {
212 BitField<0, 4, u32> res_min_mip_level; 226 return raw == rhs.raw;
213 BitField<4, 4, u32> res_max_mip_level; 227 }
214 BitField<8, 4, MsaaMode> msaa_mode;
215 BitField<12, 12, u32> min_lod_clamp;
216 };
217 228
218 GPUVAddr Address() const { 229 constexpr bool operator!=(const TICEntry& rhs) const noexcept {
230 return raw != rhs.raw;
231 }
232
233 constexpr GPUVAddr Address() const {
219 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); 234 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
220 } 235 }
221 236
222 u32 Pitch() const { 237 constexpr u32 Pitch() const {
223 ASSERT(header_version == TICHeaderVersion::Pitch || 238 ASSERT(header_version == TICHeaderVersion::Pitch ||
224 header_version == TICHeaderVersion::PitchColorKey); 239 header_version == TICHeaderVersion::PitchColorKey);
225 // The pitch value is 21 bits, and is 32B aligned. 240 // The pitch value is 21 bits, and is 32B aligned.
226 return pitch_high << 5; 241 return pitch_high << 5;
227 } 242 }
228 243
229 u32 Width() const { 244 constexpr u32 Width() const {
230 if (header_version != TICHeaderVersion::OneDBuffer) { 245 if (header_version != TICHeaderVersion::OneDBuffer) {
231 return width_minus_1 + 1; 246 return width_minus_one + 1;
232 } 247 }
233 return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1; 248 return (buffer_high_width_minus_one << 16 | buffer_low_width_minus_one) + 1;
234 } 249 }
235 250
236 u32 Height() const { 251 constexpr u32 Height() const {
237 return height_minus_1 + 1; 252 return height_minus_1 + 1;
238 } 253 }
239 254
240 u32 Depth() const { 255 constexpr u32 Depth() const {
241 return depth_minus_1 + 1; 256 return depth_minus_1 + 1;
242 } 257 }
243 258
244 u32 BlockWidth() const { 259 constexpr u32 BaseLayer() const {
245 ASSERT(IsTiled()); 260 return layer_base_0_2 | layer_base_3_7 << 3 | layer_base_8_10 << 8;
246 return block_width;
247 }
248
249 u32 BlockHeight() const {
250 ASSERT(IsTiled());
251 return block_height;
252 }
253
254 u32 BlockDepth() const {
255 ASSERT(IsTiled());
256 return block_depth;
257 } 261 }
258 262
259 bool IsTiled() const { 263 constexpr bool IsBlockLinear() const {
260 return header_version == TICHeaderVersion::BlockLinear || 264 return header_version == TICHeaderVersion::BlockLinear ||
261 header_version == TICHeaderVersion::BlockLinearColorKey; 265 header_version == TICHeaderVersion::BlockLinearColorKey;
262 } 266 }
263 267
264 bool IsLineal() const { 268 constexpr bool IsPitchLinear() const {
265 return header_version == TICHeaderVersion::Pitch || 269 return header_version == TICHeaderVersion::Pitch ||
266 header_version == TICHeaderVersion::PitchColorKey; 270 header_version == TICHeaderVersion::PitchColorKey;
267 } 271 }
268 272
269 bool IsBuffer() const { 273 constexpr bool IsBuffer() const {
270 return header_version == TICHeaderVersion::OneDBuffer; 274 return header_version == TICHeaderVersion::OneDBuffer;
271 } 275 }
272
273 bool IsSrgbConversionEnabled() const {
274 return srgb_conversion != 0;
275 }
276}; 276};
277static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size"); 277static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size");
278 278
@@ -309,6 +309,12 @@ enum class TextureMipmapFilter : u32 {
309 Linear = 3, 309 Linear = 3,
310}; 310};
311 311
312enum class SamplerReduction : u32 {
313 WeightedAverage = 0,
314 Min = 1,
315 Max = 2,
316};
317
312enum class Anisotropy { 318enum class Anisotropy {
313 Default, 319 Default,
314 Filter2x, 320 Filter2x,
@@ -333,8 +339,12 @@ struct TSCEntry {
333 BitField<0, 2, TextureFilter> mag_filter; 339 BitField<0, 2, TextureFilter> mag_filter;
334 BitField<4, 2, TextureFilter> min_filter; 340 BitField<4, 2, TextureFilter> min_filter;
335 BitField<6, 2, TextureMipmapFilter> mipmap_filter; 341 BitField<6, 2, TextureMipmapFilter> mipmap_filter;
342 BitField<8, 1, u32> cubemap_anisotropy;
336 BitField<9, 1, u32> cubemap_interface_filtering; 343 BitField<9, 1, u32> cubemap_interface_filtering;
344 BitField<10, 2, SamplerReduction> reduction_filter;
337 BitField<12, 13, u32> mip_lod_bias; 345 BitField<12, 13, u32> mip_lod_bias;
346 BitField<25, 1, u32> float_coord_normalization;
347 BitField<26, 5, u32> trilin_opt;
338 }; 348 };
339 union { 349 union {
340 BitField<0, 12, u32> min_lod_clamp; 350 BitField<0, 12, u32> min_lod_clamp;
@@ -347,32 +357,45 @@ struct TSCEntry {
347 }; 357 };
348 std::array<f32, 4> border_color; 358 std::array<f32, 4> border_color;
349 }; 359 };
350 std::array<u8, 0x20> raw; 360 std::array<u64, 4> raw;
351 }; 361 };
352 362
353 std::array<float, 4> GetBorderColor() const noexcept; 363 constexpr bool operator==(const TSCEntry& rhs) const noexcept {
364 return raw == rhs.raw;
365 }
366
367 constexpr bool operator!=(const TSCEntry& rhs) const noexcept {
368 return raw != rhs.raw;
369 }
370
371 std::array<float, 4> BorderColor() const noexcept;
354 372
355 float GetMaxAnisotropy() const noexcept; 373 float MaxAnisotropy() const noexcept;
356 374
357 float GetMinLod() const { 375 float MinLod() const {
358 return static_cast<float>(min_lod_clamp) / 256.0f; 376 return static_cast<float>(min_lod_clamp) / 256.0f;
359 } 377 }
360 378
361 float GetMaxLod() const { 379 float MaxLod() const {
362 return static_cast<float>(max_lod_clamp) / 256.0f; 380 return static_cast<float>(max_lod_clamp) / 256.0f;
363 } 381 }
364 382
365 float GetLodBias() const { 383 float LodBias() const {
366 // Sign extend the 13-bit value. 384 // Sign extend the 13-bit value.
367 constexpr u32 mask = 1U << (13 - 1); 385 static constexpr u32 mask = 1U << (13 - 1);
368 return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; 386 return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;
369 } 387 }
370}; 388};
371static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); 389static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
372 390
373struct FullTextureInfo { 391} // namespace Tegra::Texture
374 TICEntry tic; 392
375 TSCEntry tsc; 393template <>
394struct std::hash<Tegra::Texture::TICEntry> {
395 size_t operator()(const Tegra::Texture::TICEntry& tic) const noexcept;
376}; 396};
377 397
378} // namespace Tegra::Texture 398template <>
399struct std::hash<Tegra::Texture::TSCEntry> {
400 size_t operator()(const Tegra::Texture::TSCEntry& tsc) const noexcept;
401};
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 837800bfe..53444e945 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -7,8 +7,6 @@
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/core.h" 8#include "core/core.h"
9#include "core/settings.h" 9#include "core/settings.h"
10#include "video_core/gpu_asynch.h"
11#include "video_core/gpu_synch.h"
12#include "video_core/renderer_base.h" 10#include "video_core/renderer_base.h"
13#include "video_core/renderer_opengl/renderer_opengl.h" 11#include "video_core/renderer_opengl/renderer_opengl.h"
14#include "video_core/renderer_vulkan/renderer_vulkan.h" 12#include "video_core/renderer_vulkan/renderer_vulkan.h"
@@ -39,13 +37,9 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
39namespace VideoCore { 37namespace VideoCore {
40 38
41std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { 39std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
42 std::unique_ptr<Tegra::GPU> gpu;
43 const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); 40 const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue();
44 if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { 41 std::unique_ptr<Tegra::GPU> gpu = std::make_unique<Tegra::GPU>(
45 gpu = std::make_unique<VideoCommon::GPUAsynch>(system, use_nvdec); 42 system, Settings::values.use_asynchronous_gpu_emulation.GetValue(), use_nvdec);
46 } else {
47 gpu = std::make_unique<VideoCommon::GPUSynch>(system, use_nvdec);
48 }
49 43
50 auto context = emu_window.CreateSharedContext(); 44 auto context = emu_window.CreateSharedContext();
51 const auto scope = context->Acquire(); 45 const auto scope = context->Acquire();
diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
index 5b01020ec..8d10ac29e 100644
--- a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp
@@ -32,20 +32,11 @@ namespace Vulkan {
32 32
33static constexpr char AFTERMATH_LIB_NAME[] = "GFSDK_Aftermath_Lib.x64.dll"; 33static constexpr char AFTERMATH_LIB_NAME[] = "GFSDK_Aftermath_Lib.x64.dll";
34 34
35NsightAftermathTracker::NsightAftermathTracker() = default; 35NsightAftermathTracker::NsightAftermathTracker() {
36
37NsightAftermathTracker::~NsightAftermathTracker() {
38 if (initialized) {
39 (void)GFSDK_Aftermath_DisableGpuCrashDumps();
40 }
41}
42
43bool NsightAftermathTracker::Initialize() {
44 if (!dl.Open(AFTERMATH_LIB_NAME)) { 36 if (!dl.Open(AFTERMATH_LIB_NAME)) {
45 LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath DLL"); 37 LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath DLL");
46 return false; 38 return;
47 } 39 }
48
49 if (!dl.GetSymbol("GFSDK_Aftermath_DisableGpuCrashDumps", 40 if (!dl.GetSymbol("GFSDK_Aftermath_DisableGpuCrashDumps",
50 &GFSDK_Aftermath_DisableGpuCrashDumps) || 41 &GFSDK_Aftermath_DisableGpuCrashDumps) ||
51 !dl.GetSymbol("GFSDK_Aftermath_EnableGpuCrashDumps", 42 !dl.GetSymbol("GFSDK_Aftermath_EnableGpuCrashDumps",
@@ -64,27 +55,28 @@ bool NsightAftermathTracker::Initialize() {
64 LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath function pointers"); 55 LOG_ERROR(Render_Vulkan, "Failed to load Nsight Aftermath function pointers");
65 return false; 56 return false;
66 } 57 }
67
68 dump_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir) + "gpucrash"; 58 dump_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir) + "gpucrash";
69 59
70 (void)Common::FS::DeleteDirRecursively(dump_dir); 60 void(Common::FS::DeleteDirRecursively(dump_dir));
71 if (!Common::FS::CreateDir(dump_dir)) { 61 if (!Common::FS::CreateDir(dump_dir)) {
72 LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory"); 62 LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory");
73 return false; 63 return;
74 } 64 }
75
76 if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps( 65 if (!GFSDK_Aftermath_SUCCEED(GFSDK_Aftermath_EnableGpuCrashDumps(
77 GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan, 66 GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan,
78 GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback, 67 GFSDK_Aftermath_GpuCrashDumpFeatureFlags_Default, GpuCrashDumpCallback,
79 ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) { 68 ShaderDebugInfoCallback, CrashDumpDescriptionCallback, this))) {
80 LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed"); 69 LOG_ERROR(Render_Vulkan, "GFSDK_Aftermath_EnableGpuCrashDumps failed");
81 return false; 70 return;
82 } 71 }
83
84 LOG_INFO(Render_Vulkan, "Nsight Aftermath dump directory is \"{}\"", dump_dir); 72 LOG_INFO(Render_Vulkan, "Nsight Aftermath dump directory is \"{}\"", dump_dir);
85
86 initialized = true; 73 initialized = true;
87 return true; 74}
75
76NsightAftermathTracker::~NsightAftermathTracker() {
77 if (initialized) {
78 (void)GFSDK_Aftermath_DisableGpuCrashDumps();
79 }
88} 80}
89 81
90void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const { 82void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const {
diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
index afe7ae99e..cee3847fb 100644
--- a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.h
+++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h
@@ -34,8 +34,6 @@ public:
34 NsightAftermathTracker(NsightAftermathTracker&&) = delete; 34 NsightAftermathTracker(NsightAftermathTracker&&) = delete;
35 NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete; 35 NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete;
36 36
37 bool Initialize();
38
39 void SaveShader(const std::vector<u32>& spirv) const; 37 void SaveShader(const std::vector<u32>& spirv) const;
40 38
41private: 39private:
@@ -78,9 +76,6 @@ private:
78#ifndef HAS_NSIGHT_AFTERMATH 76#ifndef HAS_NSIGHT_AFTERMATH
79inline NsightAftermathTracker::NsightAftermathTracker() = default; 77inline NsightAftermathTracker::NsightAftermathTracker() = default;
80inline NsightAftermathTracker::~NsightAftermathTracker() = default; 78inline NsightAftermathTracker::~NsightAftermathTracker() = default;
81inline bool NsightAftermathTracker::Initialize() {
82 return false;
83}
84inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {} 79inline void NsightAftermathTracker::SaveShader(const std::vector<u32>&) const {}
85#endif 80#endif
86 81
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.cpp b/src/video_core/vulkan_common/vulkan_debug_callback.cpp
new file mode 100644
index 000000000..ea7af8ad4
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp
@@ -0,0 +1,45 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6#include "common/logging/log.h"
7#include "video_core/vulkan_common/vulkan_debug_callback.h"
8
9namespace Vulkan {
10namespace {
11VkBool32 Callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
12 VkDebugUtilsMessageTypeFlagsEXT type,
13 const VkDebugUtilsMessengerCallbackDataEXT* data,
14 [[maybe_unused]] void* user_data) {
15 const std::string_view message{data->pMessage};
16 if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
17 LOG_CRITICAL(Render_Vulkan, "{}", message);
18 } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) {
19 LOG_WARNING(Render_Vulkan, "{}", message);
20 } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) {
21 LOG_INFO(Render_Vulkan, "{}", message);
22 } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) {
23 LOG_DEBUG(Render_Vulkan, "{}", message);
24 }
25 return VK_FALSE;
26}
27} // Anonymous namespace
28
29vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance) {
30 return instance.CreateDebugUtilsMessenger(VkDebugUtilsMessengerCreateInfoEXT{
31 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
32 .pNext = nullptr,
33 .flags = 0,
34 .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT |
35 VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
36 VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
37 VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT,
38 .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
39 VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
40 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
41 .pfnUserCallback = Callback,
42 });
43}
44
45} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.h b/src/video_core/vulkan_common/vulkan_debug_callback.h
new file mode 100644
index 000000000..2efcd244c
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_debug_callback.h
@@ -0,0 +1,11 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/vulkan_common/vulkan_wrapper.h"
6
7namespace Vulkan {
8
9vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance);
10
11} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index ce3846195..75173324e 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -13,8 +13,9 @@
13 13
14#include "common/assert.h" 14#include "common/assert.h"
15#include "core/settings.h" 15#include "core/settings.h"
16#include "video_core/renderer_vulkan/vk_device.h" 16#include "video_core/vulkan_common/nsight_aftermath_tracker.h"
17#include "video_core/renderer_vulkan/wrapper.h" 17#include "video_core/vulkan_common/vulkan_device.h"
18#include "video_core/vulkan_common/vulkan_wrapper.h"
18 19
19namespace Vulkan { 20namespace Vulkan {
20 21
@@ -46,6 +47,7 @@ constexpr std::array REQUIRED_EXTENSIONS{
46 VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, 47 VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
47 VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, 48 VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
48 VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, 49 VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
50 VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
49 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, 51 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
50 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, 52 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
51 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, 53 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
@@ -122,6 +124,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
122 VK_FORMAT_R16G16_UNORM, 124 VK_FORMAT_R16G16_UNORM,
123 VK_FORMAT_R16G16_SNORM, 125 VK_FORMAT_R16G16_SNORM,
124 VK_FORMAT_R16G16_SFLOAT, 126 VK_FORMAT_R16G16_SFLOAT,
127 VK_FORMAT_R16G16_SINT,
125 VK_FORMAT_R16_UNORM, 128 VK_FORMAT_R16_UNORM,
126 VK_FORMAT_R16_UINT, 129 VK_FORMAT_R16_UINT,
127 VK_FORMAT_R8G8B8A8_SRGB, 130 VK_FORMAT_R8G8B8A8_SRGB,
@@ -161,18 +164,32 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
161 VK_FORMAT_BC2_SRGB_BLOCK, 164 VK_FORMAT_BC2_SRGB_BLOCK,
162 VK_FORMAT_BC3_SRGB_BLOCK, 165 VK_FORMAT_BC3_SRGB_BLOCK,
163 VK_FORMAT_BC7_SRGB_BLOCK, 166 VK_FORMAT_BC7_SRGB_BLOCK,
167 VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
164 VK_FORMAT_ASTC_4x4_SRGB_BLOCK, 168 VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
165 VK_FORMAT_ASTC_8x8_SRGB_BLOCK, 169 VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
166 VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
167 VK_FORMAT_ASTC_5x4_SRGB_BLOCK, 170 VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
168 VK_FORMAT_ASTC_5x5_UNORM_BLOCK, 171 VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
169 VK_FORMAT_ASTC_5x5_SRGB_BLOCK, 172 VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
170 VK_FORMAT_ASTC_10x8_UNORM_BLOCK, 173 VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
171 VK_FORMAT_ASTC_10x8_SRGB_BLOCK, 174 VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
172 VK_FORMAT_ASTC_6x6_UNORM_BLOCK, 175 VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
173 VK_FORMAT_ASTC_6x6_SRGB_BLOCK, 176 VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
177 VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
178 VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
179 VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
180 VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
181 VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
182 VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
183 VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
184 VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
185 VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
186 VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
187 VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
188 VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
174 VK_FORMAT_ASTC_10x10_UNORM_BLOCK, 189 VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
175 VK_FORMAT_ASTC_10x10_SRGB_BLOCK, 190 VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
191 VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
192 VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
176 VK_FORMAT_ASTC_12x12_UNORM_BLOCK, 193 VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
177 VK_FORMAT_ASTC_12x12_SRGB_BLOCK, 194 VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
178 VK_FORMAT_ASTC_8x6_UNORM_BLOCK, 195 VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
@@ -190,17 +207,14 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
190 207
191} // Anonymous namespace 208} // Anonymous namespace
192 209
193VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevice physical_, 210Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface,
194 VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) 211 const vk::InstanceDispatch& dld_)
195 : dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, 212 : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
196 instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} { 213 format_properties{GetFormatProperties(physical, dld)} {
214 CheckSuitability();
197 SetupFamilies(surface); 215 SetupFamilies(surface);
198 SetupFeatures(); 216 SetupFeatures();
199}
200 217
201VKDevice::~VKDevice() = default;
202
203bool VKDevice::Create() {
204 const auto queue_cis = GetDeviceQueueCreateInfos(); 218 const auto queue_cis = GetDeviceQueueCreateInfos();
205 const std::vector extensions = LoadExtensions(); 219 const std::vector extensions = LoadExtensions();
206 220
@@ -214,7 +228,7 @@ bool VKDevice::Create() {
214 features2.features = { 228 features2.features = {
215 .robustBufferAccess = false, 229 .robustBufferAccess = false,
216 .fullDrawIndexUint32 = false, 230 .fullDrawIndexUint32 = false,
217 .imageCubeArray = false, 231 .imageCubeArray = true,
218 .independentBlend = true, 232 .independentBlend = true,
219 .geometryShader = true, 233 .geometryShader = true,
220 .tessellationShader = true, 234 .tessellationShader = true,
@@ -242,7 +256,7 @@ bool VKDevice::Create() {
242 .shaderTessellationAndGeometryPointSize = false, 256 .shaderTessellationAndGeometryPointSize = false,
243 .shaderImageGatherExtended = true, 257 .shaderImageGatherExtended = true,
244 .shaderStorageImageExtendedFormats = false, 258 .shaderStorageImageExtendedFormats = false,
245 .shaderStorageImageMultisample = false, 259 .shaderStorageImageMultisample = true,
246 .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, 260 .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
247 .shaderStorageImageWriteWithoutFormat = true, 261 .shaderStorageImageWriteWithoutFormat = true,
248 .shaderUniformBufferArrayDynamicIndexing = false, 262 .shaderUniformBufferArrayDynamicIndexing = false,
@@ -268,7 +282,6 @@ bool VKDevice::Create() {
268 .variableMultisampleRate = false, 282 .variableMultisampleRate = false,
269 .inheritedQueries = false, 283 .inheritedQueries = false,
270 }; 284 };
271
272 VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ 285 VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{
273 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, 286 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR,
274 .pNext = nullptr, 287 .pNext = nullptr,
@@ -380,13 +393,27 @@ bool VKDevice::Create() {
380 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); 393 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
381 } 394 }
382 395
396 VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
397 if (ext_robustness2) {
398 robustness2 = {
399 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT,
400 .pNext = nullptr,
401 .robustBufferAccess2 = false,
402 .robustImageAccess2 = true,
403 .nullDescriptor = true,
404 };
405 SetNext(next, robustness2);
406 } else {
407 LOG_INFO(Render_Vulkan, "Device doesn't support robustness2");
408 }
409
383 if (!ext_depth_range_unrestricted) { 410 if (!ext_depth_range_unrestricted) {
384 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); 411 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
385 } 412 }
386 413
387 VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; 414 VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv;
388 if (nv_device_diagnostics_config) { 415 if (nv_device_diagnostics_config) {
389 nsight_aftermath_tracker.Initialize(); 416 nsight_aftermath_tracker = std::make_unique<NsightAftermathTracker>();
390 417
391 diagnostics_nv = { 418 diagnostics_nv = {
392 .sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV, 419 .sType = VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV,
@@ -397,15 +424,17 @@ bool VKDevice::Create() {
397 }; 424 };
398 first_next = &diagnostics_nv; 425 first_next = &diagnostics_nv;
399 } 426 }
400
401 logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); 427 logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld);
402 if (!logical) {
403 LOG_ERROR(Render_Vulkan, "Failed to create logical device");
404 return false;
405 }
406 428
407 CollectTelemetryParameters(); 429 CollectTelemetryParameters();
430 CollectToolingInfo();
408 431
432 if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) {
433 LOG_WARNING(
434 Render_Vulkan,
435 "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu");
436 ext_extended_dynamic_state = false;
437 }
409 if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) { 438 if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) {
410 // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it 439 // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it
411 // seems to cause stability issues 440 // seems to cause stability issues
@@ -419,11 +448,12 @@ bool VKDevice::Create() {
419 present_queue = logical.GetQueue(present_family); 448 present_queue = logical.GetQueue(present_family);
420 449
421 use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); 450 use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
422 return true;
423} 451}
424 452
425VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, 453Device::~Device() = default;
426 FormatType format_type) const { 454
455VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
456 FormatType format_type) const {
427 if (IsFormatSupported(wanted_format, wanted_usage, format_type)) { 457 if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
428 return wanted_format; 458 return wanted_format;
429 } 459 }
@@ -454,18 +484,20 @@ VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFla
454 return wanted_format; 484 return wanted_format;
455} 485}
456 486
457void VKDevice::ReportLoss() const { 487void Device::ReportLoss() const {
458 LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); 488 LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
459 489
460 // Wait for the log to flush and for Nsight Aftermath to dump the results 490 // Wait for the log to flush and for Nsight Aftermath to dump the results
461 std::this_thread::sleep_for(std::chrono::seconds{3}); 491 std::this_thread::sleep_for(std::chrono::seconds{15});
462} 492}
463 493
464void VKDevice::SaveShader(const std::vector<u32>& spirv) const { 494void Device::SaveShader(const std::vector<u32>& spirv) const {
465 nsight_aftermath_tracker.SaveShader(spirv); 495 if (nsight_aftermath_tracker) {
496 nsight_aftermath_tracker->SaveShader(spirv);
497 }
466} 498}
467 499
468bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { 500bool Device::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const {
469 // Disable for now to avoid converting ASTC twice. 501 // Disable for now to avoid converting ASTC twice.
470 static constexpr std::array astc_formats = { 502 static constexpr std::array astc_formats = {
471 VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, 503 VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
@@ -499,8 +531,18 @@ bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features)
499 return true; 531 return true;
500} 532}
501 533
502bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, 534bool Device::TestDepthStencilBlits() const {
503 FormatType format_type) const { 535 static constexpr VkFormatFeatureFlags required_features =
536 VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
537 const auto test_features = [](VkFormatProperties props) {
538 return (props.optimalTilingFeatures & required_features) == required_features;
539 };
540 return test_features(format_properties.at(VK_FORMAT_D32_SFLOAT_S8_UINT)) &&
541 test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT));
542}
543
544bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
545 FormatType format_type) const {
504 const auto it = format_properties.find(wanted_format); 546 const auto it = format_properties.find(wanted_format);
505 if (it == format_properties.end()) { 547 if (it == format_properties.end()) {
506 UNIMPLEMENTED_MSG("Unimplemented format query={}", wanted_format); 548 UNIMPLEMENTED_MSG("Unimplemented format query={}", wanted_format);
@@ -510,65 +552,47 @@ bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wa
510 return (supported_usage & wanted_usage) == wanted_usage; 552 return (supported_usage & wanted_usage) == wanted_usage;
511} 553}
512 554
513bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) { 555void Device::CheckSuitability() const {
514 bool is_suitable = true;
515 std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions; 556 std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions;
516 557 for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) {
517 for (const auto& prop : physical.EnumerateDeviceExtensionProperties()) {
518 for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { 558 for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
519 if (available_extensions[i]) { 559 if (available_extensions[i]) {
520 continue; 560 continue;
521 } 561 }
522 const std::string_view name{prop.extensionName}; 562 const std::string_view name{property.extensionName};
523 available_extensions[i] = name == REQUIRED_EXTENSIONS[i]; 563 available_extensions[i] = name == REQUIRED_EXTENSIONS[i];
524 } 564 }
525 } 565 }
526 if (!available_extensions.all()) { 566 for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
527 for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { 567 if (available_extensions[i]) {
528 if (available_extensions[i]) {
529 continue;
530 }
531 LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]);
532 is_suitable = false;
533 }
534 }
535
536 bool has_graphics{}, has_present{};
537 const std::vector queue_family_properties = physical.GetQueueFamilyProperties();
538 for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
539 const auto& family = queue_family_properties[i];
540 if (family.queueCount == 0) {
541 continue; 568 continue;
542 } 569 }
543 has_graphics |= family.queueFlags & VK_QUEUE_GRAPHICS_BIT; 570 LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]);
544 has_present |= physical.GetSurfaceSupportKHR(i, surface); 571 throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
545 } 572 }
546 if (!has_graphics || !has_present) { 573 struct LimitTuple {
547 LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue"); 574 u32 minimum;
548 is_suitable = false; 575 u32 value;
549 } 576 const char* name;
550 577 };
551 // TODO(Rodrigo): Check if the device matches all requeriments. 578 const VkPhysicalDeviceLimits& limits{properties.limits};
552 const auto properties{physical.GetProperties()}; 579 const std::array limits_report{
553 const auto& limits{properties.limits}; 580 LimitTuple{65536, limits.maxUniformBufferRange, "maxUniformBufferRange"},
554 581 LimitTuple{16, limits.maxViewports, "maxViewports"},
555 constexpr u32 required_ubo_size = 65536; 582 LimitTuple{8, limits.maxColorAttachments, "maxColorAttachments"},
556 if (limits.maxUniformBufferRange < required_ubo_size) { 583 LimitTuple{8, limits.maxClipDistances, "maxClipDistances"},
557 LOG_ERROR(Render_Vulkan, "Device UBO size {} is too small, {} is required", 584 };
558 limits.maxUniformBufferRange, required_ubo_size); 585 for (const auto& tuple : limits_report) {
559 is_suitable = false; 586 if (tuple.value < tuple.minimum) {
560 } 587 LOG_ERROR(Render_Vulkan, "{} has to be {} or greater but it is {}", tuple.name,
561 588 tuple.minimum, tuple.value);
562 constexpr u32 required_num_viewports = 16; 589 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
563 if (limits.maxViewports < required_num_viewports) { 590 }
564 LOG_INFO(Render_Vulkan, "Device number of viewports {} is too small, {} is required",
565 limits.maxViewports, required_num_viewports);
566 is_suitable = false;
567 } 591 }
568 592 const VkPhysicalDeviceFeatures features{physical.GetFeatures()};
569 const auto features{physical.GetFeatures()}; 593 const std::array feature_report{
570 const std::array feature_report = {
571 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), 594 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
595 std::make_pair(features.imageCubeArray, "imageCubeArray"),
572 std::make_pair(features.independentBlend, "independentBlend"), 596 std::make_pair(features.independentBlend, "independentBlend"),
573 std::make_pair(features.depthClamp, "depthClamp"), 597 std::make_pair(features.depthClamp, "depthClamp"),
574 std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), 598 std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
@@ -580,25 +604,20 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) {
580 std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), 604 std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
581 std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), 605 std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
582 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), 606 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
607 std::make_pair(features.shaderStorageImageMultisample, "shaderStorageImageMultisample"),
583 std::make_pair(features.shaderStorageImageWriteWithoutFormat, 608 std::make_pair(features.shaderStorageImageWriteWithoutFormat,
584 "shaderStorageImageWriteWithoutFormat"), 609 "shaderStorageImageWriteWithoutFormat"),
585 }; 610 };
586 for (const auto& [supported, name] : feature_report) { 611 for (const auto& [is_supported, name] : feature_report) {
587 if (supported) { 612 if (is_supported) {
588 continue; 613 continue;
589 } 614 }
590 LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name); 615 LOG_ERROR(Render_Vulkan, "Missing required feature: {}", name);
591 is_suitable = false; 616 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
592 } 617 }
593
594 if (!is_suitable) {
595 LOG_ERROR(Render_Vulkan, "{} is not suitable", properties.deviceName);
596 }
597
598 return is_suitable;
599} 618}
600 619
601std::vector<const char*> VKDevice::LoadExtensions() { 620std::vector<const char*> Device::LoadExtensions() {
602 std::vector<const char*> extensions; 621 std::vector<const char*> extensions;
603 extensions.reserve(7 + REQUIRED_EXTENSIONS.size()); 622 extensions.reserve(7 + REQUIRED_EXTENSIONS.size());
604 extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()); 623 extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end());
@@ -608,6 +627,7 @@ std::vector<const char*> VKDevice::LoadExtensions() {
608 bool has_ext_transform_feedback{}; 627 bool has_ext_transform_feedback{};
609 bool has_ext_custom_border_color{}; 628 bool has_ext_custom_border_color{};
610 bool has_ext_extended_dynamic_state{}; 629 bool has_ext_extended_dynamic_state{};
630 bool has_ext_robustness2{};
611 for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { 631 for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) {
612 const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, 632 const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
613 bool push) { 633 bool push) {
@@ -627,14 +647,16 @@ std::vector<const char*> VKDevice::LoadExtensions() {
627 test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); 647 test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
628 test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); 648 test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
629 test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); 649 test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
650 test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true);
630 test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, 651 test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME,
631 true); 652 true);
653 test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true);
654 test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true);
632 test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); 655 test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
633 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); 656 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
634 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); 657 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
635 if (instance_version >= VK_API_VERSION_1_1) { 658 test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false);
636 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); 659 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
637 }
638 if (Settings::values.renderer_debug) { 660 if (Settings::values.renderer_debug) {
639 test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, 661 test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME,
640 true); 662 true);
@@ -733,41 +755,60 @@ std::vector<const char*> VKDevice::LoadExtensions() {
733 } 755 }
734 } 756 }
735 757
758 if (has_ext_robustness2) {
759 VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
760 robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
761 robustness2.pNext = nullptr;
762 features.pNext = &robustness2;
763 physical.GetFeatures2KHR(features);
764 if (robustness2.nullDescriptor && robustness2.robustImageAccess2) {
765 extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
766 ext_robustness2 = true;
767 }
768 }
769
736 return extensions; 770 return extensions;
737} 771}
738 772
739void VKDevice::SetupFamilies(VkSurfaceKHR surface) { 773void Device::SetupFamilies(VkSurfaceKHR surface) {
740 std::optional<u32> graphics_family_, present_family_;
741
742 const std::vector queue_family_properties = physical.GetQueueFamilyProperties(); 774 const std::vector queue_family_properties = physical.GetQueueFamilyProperties();
743 for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { 775 std::optional<u32> graphics;
744 if (graphics_family_ && present_family_) 776 std::optional<u32> present;
777 for (u32 index = 0; index < static_cast<u32>(queue_family_properties.size()); ++index) {
778 if (graphics && (present || !surface)) {
745 break; 779 break;
746 780 }
747 const auto& queue_family = queue_family_properties[i]; 781 const VkQueueFamilyProperties& queue_family = queue_family_properties[index];
748 if (queue_family.queueCount == 0) 782 if (queue_family.queueCount == 0) {
749 continue; 783 continue;
750 784 }
751 if (queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) { 785 if (queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) {
752 graphics_family_ = i; 786 graphics = index;
753 } 787 }
754 if (physical.GetSurfaceSupportKHR(i, surface)) { 788 if (surface && physical.GetSurfaceSupportKHR(index, surface)) {
755 present_family_ = i; 789 present = index;
756 } 790 }
757 } 791 }
758 ASSERT(graphics_family_ && present_family_); 792 if (!graphics) {
759 793 LOG_ERROR(Render_Vulkan, "Device lacks a graphics queue");
760 graphics_family = *graphics_family_; 794 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
761 present_family = *present_family_; 795 }
796 if (surface && !present) {
797 LOG_ERROR(Render_Vulkan, "Device lacks a present queue");
798 throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT);
799 }
800 graphics_family = *graphics;
801 present_family = *present;
762} 802}
763 803
764void VKDevice::SetupFeatures() { 804void Device::SetupFeatures() {
765 const auto supported_features{physical.GetFeatures()}; 805 const auto supported_features{physical.GetFeatures()};
766 is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; 806 is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
807 is_blit_depth_stencil_supported = TestDepthStencilBlits();
767 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); 808 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
768} 809}
769 810
770void VKDevice::CollectTelemetryParameters() { 811void Device::CollectTelemetryParameters() {
771 VkPhysicalDeviceDriverPropertiesKHR driver{ 812 VkPhysicalDeviceDriverPropertiesKHR driver{
772 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR, 813 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR,
773 .pNext = nullptr, 814 .pNext = nullptr,
@@ -794,7 +835,33 @@ void VKDevice::CollectTelemetryParameters() {
794 } 835 }
795} 836}
796 837
797std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { 838void Device::CollectToolingInfo() {
839 if (!ext_tooling_info) {
840 return;
841 }
842 const auto vkGetPhysicalDeviceToolPropertiesEXT =
843 reinterpret_cast<PFN_vkGetPhysicalDeviceToolPropertiesEXT>(
844 dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceToolPropertiesEXT"));
845 if (!vkGetPhysicalDeviceToolPropertiesEXT) {
846 return;
847 }
848 u32 tool_count = 0;
849 if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, nullptr) != VK_SUCCESS) {
850 return;
851 }
852 std::vector<VkPhysicalDeviceToolPropertiesEXT> tools(tool_count);
853 if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, tools.data()) != VK_SUCCESS) {
854 return;
855 }
856 for (const VkPhysicalDeviceToolPropertiesEXT& tool : tools) {
857 const std::string_view name = tool.name;
858 LOG_INFO(Render_Vulkan, "{}", name);
859 has_renderdoc = has_renderdoc || name == "RenderDoc";
860 has_nsight_graphics = has_nsight_graphics || name == "NVIDIA Nsight Graphics";
861 }
862}
863
864std::vector<VkDeviceQueueCreateInfo> Device::GetDeviceQueueCreateInfos() const {
798 static constexpr float QUEUE_PRIORITY = 1.0f; 865 static constexpr float QUEUE_PRIORITY = 1.0f;
799 866
800 std::unordered_set<u32> unique_queue_families{graphics_family, present_family}; 867 std::unordered_set<u32> unique_queue_families{graphics_family, present_family};
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 4286673d9..a973c3ce4 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -10,11 +10,12 @@
10#include <vector> 10#include <vector>
11 11
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/renderer_vulkan/nsight_aftermath_tracker.h" 13#include "video_core/vulkan_common/vulkan_wrapper.h"
14#include "video_core/renderer_vulkan/wrapper.h"
15 14
16namespace Vulkan { 15namespace Vulkan {
17 16
17class NsightAftermathTracker;
18
18/// Format usage descriptor. 19/// Format usage descriptor.
19enum class FormatType { Linear, Optimal, Buffer }; 20enum class FormatType { Linear, Optimal, Buffer };
20 21
@@ -22,14 +23,11 @@ enum class FormatType { Linear, Optimal, Buffer };
22const u32 GuestWarpSize = 32; 23const u32 GuestWarpSize = 32;
23 24
24/// Handles data specific to a physical device. 25/// Handles data specific to a physical device.
25class VKDevice final { 26class Device final {
26public: 27public:
27 explicit VKDevice(VkInstance instance, u32 instance_version, vk::PhysicalDevice physical, 28 explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface,
28 VkSurfaceKHR surface, const vk::InstanceDispatch& dld); 29 const vk::InstanceDispatch& dld);
29 ~VKDevice(); 30 ~Device();
30
31 /// Initializes the device. Returns true on success.
32 bool Create();
33 31
34 /** 32 /**
35 * Returns a format supported by the device for the passed requeriments. 33 * Returns a format supported by the device for the passed requeriments.
@@ -82,11 +80,6 @@ public:
82 return present_family; 80 return present_family;
83 } 81 }
84 82
85 /// Returns the current instance Vulkan API version in Vulkan-formatted version numbers.
86 u32 InstanceApiVersion() const {
87 return instance_version;
88 }
89
90 /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers. 83 /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers.
91 u32 ApiVersion() const { 84 u32 ApiVersion() const {
92 return properties.apiVersion; 85 return properties.apiVersion;
@@ -157,6 +150,11 @@ public:
157 return is_formatless_image_load_supported; 150 return is_formatless_image_load_supported;
158 } 151 }
159 152
153 /// Returns true when blitting from and to depth stencil images is supported.
154 bool IsBlitDepthStencilSupported() const {
155 return is_blit_depth_stencil_supported;
156 }
157
160 /// Returns true if the device supports VK_NV_viewport_swizzle. 158 /// Returns true if the device supports VK_NV_viewport_swizzle.
161 bool IsNvViewportSwizzleSupported() const { 159 bool IsNvViewportSwizzleSupported() const {
162 return nv_viewport_swizzle; 160 return nv_viewport_swizzle;
@@ -172,6 +170,11 @@ public:
172 return ext_index_type_uint8; 170 return ext_index_type_uint8;
173 } 171 }
174 172
173 /// Returns true if the device supports VK_EXT_sampler_filter_minmax.
174 bool IsExtSamplerFilterMinmaxSupported() const {
175 return ext_sampler_filter_minmax;
176 }
177
175 /// Returns true if the device supports VK_EXT_depth_range_unrestricted. 178 /// Returns true if the device supports VK_EXT_depth_range_unrestricted.
176 bool IsExtDepthRangeUnrestrictedSupported() const { 179 bool IsExtDepthRangeUnrestrictedSupported() const {
177 return ext_depth_range_unrestricted; 180 return ext_depth_range_unrestricted;
@@ -197,6 +200,16 @@ public:
197 return ext_extended_dynamic_state; 200 return ext_extended_dynamic_state;
198 } 201 }
199 202
203 /// Returns true if the device supports VK_EXT_shader_stencil_export.
204 bool IsExtShaderStencilExportSupported() const {
205 return ext_shader_stencil_export;
206 }
207
208 /// Returns true when a known debugging tool is attached.
209 bool HasDebuggingToolAttached() const {
210 return has_renderdoc || has_nsight_graphics;
211 }
212
200 /// Returns the vendor name reported from Vulkan. 213 /// Returns the vendor name reported from Vulkan.
201 std::string_view GetVendorName() const { 214 std::string_view GetVendorName() const {
202 return vendor_name; 215 return vendor_name;
@@ -212,10 +225,10 @@ public:
212 return use_asynchronous_shaders; 225 return use_asynchronous_shaders;
213 } 226 }
214 227
228private:
215 /// Checks if the physical device is suitable. 229 /// Checks if the physical device is suitable.
216 static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface); 230 void CheckSuitability() const;
217 231
218private:
219 /// Loads extensions into a vector and stores available ones in this object. 232 /// Loads extensions into a vector and stores available ones in this object.
220 std::vector<const char*> LoadExtensions(); 233 std::vector<const char*> LoadExtensions();
221 234
@@ -228,16 +241,23 @@ private:
228 /// Collects telemetry information from the device. 241 /// Collects telemetry information from the device.
229 void CollectTelemetryParameters(); 242 void CollectTelemetryParameters();
230 243
244 /// Collects information about attached tools.
245 void CollectToolingInfo();
246
231 /// Returns a list of queue initialization descriptors. 247 /// Returns a list of queue initialization descriptors.
232 std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; 248 std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
233 249
234 /// Returns true if ASTC textures are natively supported. 250 /// Returns true if ASTC textures are natively supported.
235 bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; 251 bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const;
236 252
253 /// Returns true if the device natively supports blitting depth stencil images.
254 bool TestDepthStencilBlits() const;
255
237 /// Returns true if a format is supported. 256 /// Returns true if a format is supported.
238 bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, 257 bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
239 FormatType format_type) const; 258 FormatType format_type) const;
240 259
260 VkInstance instance; ///< Vulkan instance.
241 vk::DeviceDispatch dld; ///< Device function pointers. 261 vk::DeviceDispatch dld; ///< Device function pointers.
242 vk::PhysicalDevice physical; ///< Physical device. 262 vk::PhysicalDevice physical; ///< Physical device.
243 VkPhysicalDeviceProperties properties; ///< Device properties. 263 VkPhysicalDeviceProperties properties; ///< Device properties.
@@ -253,15 +273,22 @@ private:
253 bool is_float16_supported{}; ///< Support for float16 arithmetics. 273 bool is_float16_supported{}; ///< Support for float16 arithmetics.
254 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. 274 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
255 bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. 275 bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
276 bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
256 bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. 277 bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
257 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. 278 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
258 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. 279 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
280 bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
259 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. 281 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
260 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. 282 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
283 bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
261 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. 284 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
262 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. 285 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
263 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. 286 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
287 bool ext_robustness2{}; ///< Support for VK_EXT_robustness2.
288 bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
264 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. 289 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
290 bool has_renderdoc{}; ///< Has RenderDoc attached
291 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
265 292
266 // Asynchronous Graphics Pipeline setting 293 // Asynchronous Graphics Pipeline setting
267 bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline 294 bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline
@@ -274,7 +301,7 @@ private:
274 std::unordered_map<VkFormat, VkFormatProperties> format_properties; 301 std::unordered_map<VkFormat, VkFormatProperties> format_properties;
275 302
276 /// Nsight Aftermath GPU crash tracker 303 /// Nsight Aftermath GPU crash tracker
277 NsightAftermathTracker nsight_aftermath_tracker; 304 std::unique_ptr<NsightAftermathTracker> nsight_aftermath_tracker;
278}; 305};
279 306
280} // namespace Vulkan 307} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp
new file mode 100644
index 000000000..889ecda0c
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_instance.cpp
@@ -0,0 +1,151 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <optional>
7#include <span>
8#include <utility>
9#include <vector>
10
11#include "common/common_types.h"
12#include "common/dynamic_library.h"
13#include "common/logging/log.h"
14#include "core/frontend/emu_window.h"
15#include "video_core/vulkan_common/vulkan_instance.h"
16#include "video_core/vulkan_common/vulkan_wrapper.h"
17
18// Include these late to avoid polluting previous headers
19#ifdef _WIN32
20#include <windows.h>
21// ensure include order
22#include <vulkan/vulkan_win32.h>
23#endif
24
25#if !defined(_WIN32) && !defined(__APPLE__)
26#include <X11/Xlib.h>
27#include <vulkan/vulkan_wayland.h>
28#include <vulkan/vulkan_xlib.h>
29#endif
30
31namespace Vulkan {
32namespace {
33[[nodiscard]] std::vector<const char*> RequiredExtensions(
34 Core::Frontend::WindowSystemType window_type, bool enable_debug_utils) {
35 std::vector<const char*> extensions;
36 extensions.reserve(6);
37 switch (window_type) {
38 case Core::Frontend::WindowSystemType::Headless:
39 break;
40#ifdef _WIN32
41 case Core::Frontend::WindowSystemType::Windows:
42 extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
43 break;
44#endif
45#if !defined(_WIN32) && !defined(__APPLE__)
46 case Core::Frontend::WindowSystemType::X11:
47 extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
48 break;
49 case Core::Frontend::WindowSystemType::Wayland:
50 extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
51 break;
52#endif
53 default:
54 LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
55 break;
56 }
57 if (window_type != Core::Frontend::WindowSystemType::Headless) {
58 extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
59 }
60 if (enable_debug_utils) {
61 extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
62 }
63 extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
64 return extensions;
65}
66
67[[nodiscard]] bool AreExtensionsSupported(const vk::InstanceDispatch& dld,
68 std::span<const char* const> extensions) {
69 const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld);
70 if (!properties) {
71 LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
72 return false;
73 }
74 for (const char* extension : extensions) {
75 const auto it = std::ranges::find_if(*properties, [extension](const auto& prop) {
76 return std::strcmp(extension, prop.extensionName) == 0;
77 });
78 if (it == properties->end()) {
79 LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
80 return false;
81 }
82 }
83 return true;
84}
85
86[[nodiscard]] std::vector<const char*> Layers(bool enable_layers) {
87 std::vector<const char*> layers;
88 if (enable_layers) {
89 layers.push_back("VK_LAYER_KHRONOS_validation");
90 }
91 return layers;
92}
93
94void RemoveUnavailableLayers(const vk::InstanceDispatch& dld, std::vector<const char*>& layers) {
95 const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld);
96 if (!layer_properties) {
97 LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers");
98 layers.clear();
99 }
100 std::erase_if(layers, [&layer_properties](const char* layer) {
101 const auto comp = [layer](const VkLayerProperties& layer_property) {
102 return std::strcmp(layer, layer_property.layerName) == 0;
103 };
104 const auto it = std::ranges::find_if(*layer_properties, comp);
105 if (it == layer_properties->end()) {
106 LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer);
107 return true;
108 }
109 return false;
110 });
111}
112} // Anonymous namespace
113
114vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceDispatch& dld,
115 u32 required_version, Core::Frontend::WindowSystemType window_type,
116 bool enable_debug_utils, bool enable_layers) {
117 if (!library.IsOpen()) {
118 LOG_ERROR(Render_Vulkan, "Vulkan library not available");
119 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
120 }
121 if (!library.GetSymbol("vkGetInstanceProcAddr", &dld.vkGetInstanceProcAddr)) {
122 LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan");
123 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
124 }
125 if (!vk::Load(dld)) {
126 LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers");
127 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
128 }
129 const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_debug_utils);
130 if (!AreExtensionsSupported(dld, extensions)) {
131 throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
132 }
133 std::vector<const char*> layers = Layers(enable_layers);
134 RemoveUnavailableLayers(dld, layers);
135
136 const u32 available_version = vk::AvailableVersion(dld);
137 if (available_version < required_version) {
138 LOG_ERROR(Render_Vulkan, "Vulkan {}.{} is not supported, {}.{} is required",
139 VK_VERSION_MAJOR(available_version), VK_VERSION_MINOR(available_version),
140 VK_VERSION_MAJOR(required_version), VK_VERSION_MINOR(required_version));
141 throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
142 }
143 vk::Instance instance = vk::Instance::Create(required_version, layers, extensions, dld);
144 if (!vk::Load(*instance, dld)) {
145 LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers");
146 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
147 }
148 return instance;
149}
150
151} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_instance.h b/src/video_core/vulkan_common/vulkan_instance.h
new file mode 100644
index 000000000..e5e3a7144
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_instance.h
@@ -0,0 +1,32 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "common/dynamic_library.h"
9#include "core/frontend/emu_window.h"
10#include "video_core/vulkan_common/vulkan_wrapper.h"
11
12namespace Vulkan {
13
14/**
15 * Create a Vulkan instance
16 *
17 * @param library Dynamic library to load the Vulkan instance from
18 * @param dld Dispatch table to load function pointers into
19 * @param required_version Required Vulkan version (for example, VK_API_VERSION_1_1)
20 * @param window_type Window system type's enabled extension
21 * @param enable_debug_utils Whether to enable VK_EXT_debug_utils_extension_name or not
22 * @param enable_layers Whether to enable Vulkan validation layers or not
23 *
24 * @return A new Vulkan instance
25 * @throw vk::Exception on failure
26 */
27[[nodiscard]] vk::Instance CreateInstance(
28 const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, u32 required_version,
29 Core::Frontend::WindowSystemType window_type = Core::Frontend::WindowSystemType::Headless,
30 bool enable_debug_utils = false, bool enable_layers = false);
31
32} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_library.cpp b/src/video_core/vulkan_common/vulkan_library.cpp
new file mode 100644
index 000000000..557871d81
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_library.cpp
@@ -0,0 +1,36 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstdlib>
6#include <string>
7
8#include "common/dynamic_library.h"
9#include "common/file_util.h"
10#include "video_core/vulkan_common/vulkan_library.h"
11
12namespace Vulkan {
13
14Common::DynamicLibrary OpenLibrary() {
15 Common::DynamicLibrary library;
16#ifdef __APPLE__
17 // Check if a path to a specific Vulkan library has been specified.
18 char* const libvulkan_env = std::getenv("LIBVULKAN_PATH");
19 if (!libvulkan_env || !library.Open(libvulkan_env)) {
20 // Use the libvulkan.dylib from the application bundle.
21 const std::string filename =
22 Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
23 void(library.Open(filename.c_str()));
24 }
25#else
26 std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1);
27 if (!library.Open(filename.c_str())) {
28 // Android devices may not have libvulkan.so.1, only libvulkan.so.
29 filename = Common::DynamicLibrary::GetVersionedFilename("vulkan");
30 void(library.Open(filename.c_str()));
31 }
32#endif
33 return library;
34}
35
36} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_library.h b/src/video_core/vulkan_common/vulkan_library.h
new file mode 100644
index 000000000..8b28b0e17
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_library.h
@@ -0,0 +1,13 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/dynamic_library.h"
8
9namespace Vulkan {
10
11Common::DynamicLibrary OpenLibrary();
12
13} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_surface.cpp b/src/video_core/vulkan_common/vulkan_surface.cpp
new file mode 100644
index 000000000..3c3238f96
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_surface.cpp
@@ -0,0 +1,81 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/logging/log.h"
6#include "core/frontend/emu_window.h"
7#include "video_core/vulkan_common/vulkan_surface.h"
8#include "video_core/vulkan_common/vulkan_wrapper.h"
9
10// Include these late to avoid polluting previous headers
11#ifdef _WIN32
12#include <windows.h>
13// ensure include order
14#include <vulkan/vulkan_win32.h>
15#endif
16
17#if !defined(_WIN32) && !defined(__APPLE__)
18#include <X11/Xlib.h>
19#include <vulkan/vulkan_wayland.h>
20#include <vulkan/vulkan_xlib.h>
21#endif
22
23namespace Vulkan {
24
25vk::SurfaceKHR CreateSurface(const vk::Instance& instance,
26 const Core::Frontend::EmuWindow& emu_window) {
27 [[maybe_unused]] const vk::InstanceDispatch& dld = instance.Dispatch();
28 [[maybe_unused]] const auto& window_info = emu_window.GetWindowInfo();
29 VkSurfaceKHR unsafe_surface = nullptr;
30
31#ifdef _WIN32
32 if (window_info.type == Core::Frontend::WindowSystemType::Windows) {
33 const HWND hWnd = static_cast<HWND>(window_info.render_surface);
34 const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
35 nullptr, 0, nullptr, hWnd};
36 const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>(
37 dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR"));
38 if (!vkCreateWin32SurfaceKHR ||
39 vkCreateWin32SurfaceKHR(*instance, &win32_ci, nullptr, &unsafe_surface) != VK_SUCCESS) {
40 LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface");
41 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
42 }
43 }
44#endif
45#if !defined(_WIN32) && !defined(__APPLE__)
46 if (window_info.type == Core::Frontend::WindowSystemType::X11) {
47 const VkXlibSurfaceCreateInfoKHR xlib_ci{
48 VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0,
49 static_cast<Display*>(window_info.display_connection),
50 reinterpret_cast<Window>(window_info.render_surface)};
51 const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>(
52 dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR"));
53 if (!vkCreateXlibSurfaceKHR ||
54 vkCreateXlibSurfaceKHR(*instance, &xlib_ci, nullptr, &unsafe_surface) != VK_SUCCESS) {
55 LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
56 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
57 }
58 }
59 if (window_info.type == Core::Frontend::WindowSystemType::Wayland) {
60 const VkWaylandSurfaceCreateInfoKHR wayland_ci{
61 VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0,
62 static_cast<wl_display*>(window_info.display_connection),
63 static_cast<wl_surface*>(window_info.render_surface)};
64 const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>(
65 dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR"));
66 if (!vkCreateWaylandSurfaceKHR ||
67 vkCreateWaylandSurfaceKHR(*instance, &wayland_ci, nullptr, &unsafe_surface) !=
68 VK_SUCCESS) {
69 LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
70 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
71 }
72 }
73#endif
74 if (!unsafe_surface) {
75 LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
76 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
77 }
78 return vk::SurfaceKHR(unsafe_surface, *instance, dld);
79}
80
81} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_surface.h b/src/video_core/vulkan_common/vulkan_surface.h
new file mode 100644
index 000000000..05a169e32
--- /dev/null
+++ b/src/video_core/vulkan_common/vulkan_surface.h
@@ -0,0 +1,18 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/vulkan_common/vulkan_wrapper.h"
8
9namespace Core::Frontend {
10class EmuWindow;
11}
12
13namespace Vulkan {
14
15[[nodiscard]] vk::SurfaceKHR CreateSurface(const vk::Instance& instance,
16 const Core::Frontend::EmuWindow& emu_window);
17
18} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 1eced809e..5e15ad607 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -13,7 +13,7 @@
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "common/logging/log.h" 14#include "common/logging/log.h"
15 15
16#include "video_core/renderer_vulkan/wrapper.h" 16#include "video_core/vulkan_common/vulkan_wrapper.h"
17 17
18namespace Vulkan::vk { 18namespace Vulkan::vk {
19 19
@@ -81,6 +81,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
81 X(vkCmdBeginQuery); 81 X(vkCmdBeginQuery);
82 X(vkCmdBeginRenderPass); 82 X(vkCmdBeginRenderPass);
83 X(vkCmdBeginTransformFeedbackEXT); 83 X(vkCmdBeginTransformFeedbackEXT);
84 X(vkCmdBeginDebugUtilsLabelEXT);
84 X(vkCmdBindDescriptorSets); 85 X(vkCmdBindDescriptorSets);
85 X(vkCmdBindIndexBuffer); 86 X(vkCmdBindIndexBuffer);
86 X(vkCmdBindPipeline); 87 X(vkCmdBindPipeline);
@@ -98,6 +99,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
98 X(vkCmdEndQuery); 99 X(vkCmdEndQuery);
99 X(vkCmdEndRenderPass); 100 X(vkCmdEndRenderPass);
100 X(vkCmdEndTransformFeedbackEXT); 101 X(vkCmdEndTransformFeedbackEXT);
102 X(vkCmdEndDebugUtilsLabelEXT);
101 X(vkCmdFillBuffer); 103 X(vkCmdFillBuffer);
102 X(vkCmdPipelineBarrier); 104 X(vkCmdPipelineBarrier);
103 X(vkCmdPushConstants); 105 X(vkCmdPushConstants);
@@ -121,6 +123,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
121 X(vkCmdSetPrimitiveTopologyEXT); 123 X(vkCmdSetPrimitiveTopologyEXT);
122 X(vkCmdSetStencilOpEXT); 124 X(vkCmdSetStencilOpEXT);
123 X(vkCmdSetStencilTestEnableEXT); 125 X(vkCmdSetStencilTestEnableEXT);
126 X(vkCmdResolveImage);
124 X(vkCreateBuffer); 127 X(vkCreateBuffer);
125 X(vkCreateBufferView); 128 X(vkCreateBufferView);
126 X(vkCreateCommandPool); 129 X(vkCreateCommandPool);
@@ -176,6 +179,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
176 X(vkQueueSubmit); 179 X(vkQueueSubmit);
177 X(vkResetFences); 180 X(vkResetFences);
178 X(vkResetQueryPoolEXT); 181 X(vkResetQueryPoolEXT);
182 X(vkSetDebugUtilsObjectNameEXT);
183 X(vkSetDebugUtilsObjectTagEXT);
179 X(vkUnmapMemory); 184 X(vkUnmapMemory);
180 X(vkUpdateDescriptorSetWithTemplateKHR); 185 X(vkUpdateDescriptorSetWithTemplateKHR);
181 X(vkUpdateDescriptorSets); 186 X(vkUpdateDescriptorSets);
@@ -184,6 +189,19 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
184#undef X 189#undef X
185} 190}
186 191
192template <typename T>
193void SetObjectName(const DeviceDispatch* dld, VkDevice device, T handle, VkObjectType type,
194 const char* name) {
195 const VkDebugUtilsObjectNameInfoEXT name_info{
196 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
197 .pNext = nullptr,
198 .objectType = VK_OBJECT_TYPE_IMAGE,
199 .objectHandle = reinterpret_cast<u64>(handle),
200 .pObjectName = name,
201 };
202 Check(dld->vkSetDebugUtilsObjectNameEXT(device, &name_info));
203}
204
187} // Anonymous namespace 205} // Anonymous namespace
188 206
189bool Load(InstanceDispatch& dld) noexcept { 207bool Load(InstanceDispatch& dld) noexcept {
@@ -417,7 +435,7 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffe
417} 435}
418 436
419Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char*> extensions, 437Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char*> extensions,
420 InstanceDispatch& dispatch) noexcept { 438 InstanceDispatch& dispatch) {
421 const VkApplicationInfo application_info{ 439 const VkApplicationInfo application_info{
422 .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, 440 .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
423 .pNext = nullptr, 441 .pNext = nullptr,
@@ -437,66 +455,68 @@ Instance Instance::Create(u32 version, Span<const char*> layers, Span<const char
437 .enabledExtensionCount = extensions.size(), 455 .enabledExtensionCount = extensions.size(),
438 .ppEnabledExtensionNames = extensions.data(), 456 .ppEnabledExtensionNames = extensions.data(),
439 }; 457 };
440
441 VkInstance instance; 458 VkInstance instance;
442 if (dispatch.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) { 459 Check(dispatch.vkCreateInstance(&ci, nullptr, &instance));
443 // Failed to create the instance.
444 return {};
445 }
446 if (!Proc(dispatch.vkDestroyInstance, dispatch, "vkDestroyInstance", instance)) { 460 if (!Proc(dispatch.vkDestroyInstance, dispatch, "vkDestroyInstance", instance)) {
447 // We successfully created an instance but the destroy function couldn't be loaded. 461 // We successfully created an instance but the destroy function couldn't be loaded.
448 // This is a good moment to panic. 462 // This is a good moment to panic.
449 return {}; 463 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
450 } 464 }
451
452 return Instance(instance, dispatch); 465 return Instance(instance, dispatch);
453} 466}
454 467
455std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices() { 468std::vector<VkPhysicalDevice> Instance::EnumeratePhysicalDevices() const {
456 u32 num; 469 u32 num;
457 if (dld->vkEnumeratePhysicalDevices(handle, &num, nullptr) != VK_SUCCESS) { 470 Check(dld->vkEnumeratePhysicalDevices(handle, &num, nullptr));
458 return std::nullopt;
459 }
460 std::vector<VkPhysicalDevice> physical_devices(num); 471 std::vector<VkPhysicalDevice> physical_devices(num);
461 if (dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data()) != VK_SUCCESS) { 472 Check(dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data()));
462 return std::nullopt;
463 }
464 SortPhysicalDevices(physical_devices, *dld); 473 SortPhysicalDevices(physical_devices, *dld);
465 return std::make_optional(std::move(physical_devices)); 474 return physical_devices;
466} 475}
467 476
468DebugCallback Instance::TryCreateDebugCallback( 477DebugUtilsMessenger Instance::CreateDebugUtilsMessenger(
469 PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept { 478 const VkDebugUtilsMessengerCreateInfoEXT& create_info) const {
470 const VkDebugUtilsMessengerCreateInfoEXT ci{ 479 VkDebugUtilsMessengerEXT object;
471 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, 480 Check(dld->vkCreateDebugUtilsMessengerEXT(handle, &create_info, nullptr, &object));
472 .pNext = nullptr, 481 return DebugUtilsMessenger(object, handle, *dld);
473 .flags = 0,
474 .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT |
475 VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
476 VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
477 VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT,
478 .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
479 VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
480 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
481 .pfnUserCallback = callback,
482 .pUserData = nullptr,
483 };
484
485 VkDebugUtilsMessengerEXT messenger;
486 if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) {
487 return {};
488 }
489 return DebugCallback(messenger, handle, *dld);
490} 482}
491 483
492void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { 484void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
493 Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); 485 Check(dld->vkBindBufferMemory(owner, handle, memory, offset));
494} 486}
495 487
488void Buffer::SetObjectNameEXT(const char* name) const {
489 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name);
490}
491
492void BufferView::SetObjectNameEXT(const char* name) const {
493 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name);
494}
495
496void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { 496void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
497 Check(dld->vkBindImageMemory(owner, handle, memory, offset)); 497 Check(dld->vkBindImageMemory(owner, handle, memory, offset));
498} 498}
499 499
500void Image::SetObjectNameEXT(const char* name) const {
501 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name);
502}
503
504void ImageView::SetObjectNameEXT(const char* name) const {
505 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name);
506}
507
508void DeviceMemory::SetObjectNameEXT(const char* name) const {
509 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name);
510}
511
512void Fence::SetObjectNameEXT(const char* name) const {
513 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FENCE, name);
514}
515
516void Framebuffer::SetObjectNameEXT(const char* name) const {
517 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FRAMEBUFFER, name);
518}
519
500DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const { 520DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const {
501 const std::size_t num = ai.descriptorSetCount; 521 const std::size_t num = ai.descriptorSetCount;
502 std::unique_ptr sets = std::make_unique<VkDescriptorSet[]>(num); 522 std::unique_ptr sets = std::make_unique<VkDescriptorSet[]>(num);
@@ -510,6 +530,10 @@ DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) c
510 } 530 }
511} 531}
512 532
533void DescriptorPool::SetObjectNameEXT(const char* name) const {
534 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DESCRIPTOR_POOL, name);
535}
536
513CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const { 537CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const {
514 const VkCommandBufferAllocateInfo ai{ 538 const VkCommandBufferAllocateInfo ai{
515 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, 539 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
@@ -530,6 +554,10 @@ CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLev
530 } 554 }
531} 555}
532 556
557void CommandPool::SetObjectNameEXT(const char* name) const {
558 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_COMMAND_POOL, name);
559}
560
533std::vector<VkImage> SwapchainKHR::GetImages() const { 561std::vector<VkImage> SwapchainKHR::GetImages() const {
534 u32 num; 562 u32 num;
535 Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr)); 563 Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr));
@@ -538,9 +566,21 @@ std::vector<VkImage> SwapchainKHR::GetImages() const {
538 return images; 566 return images;
539} 567}
540 568
569void Event::SetObjectNameEXT(const char* name) const {
570 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_EVENT, name);
571}
572
573void ShaderModule::SetObjectNameEXT(const char* name) const {
574 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name);
575}
576
577void Semaphore::SetObjectNameEXT(const char* name) const {
578 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name);
579}
580
541Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, 581Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
542 Span<const char*> enabled_extensions, const void* next, 582 Span<const char*> enabled_extensions, const void* next,
543 DeviceDispatch& dispatch) noexcept { 583 DeviceDispatch& dispatch) {
544 const VkDeviceCreateInfo ci{ 584 const VkDeviceCreateInfo ci{
545 .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, 585 .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
546 .pNext = next, 586 .pNext = next,
@@ -553,11 +593,8 @@ Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreate
553 .ppEnabledExtensionNames = enabled_extensions.data(), 593 .ppEnabledExtensionNames = enabled_extensions.data(),
554 .pEnabledFeatures = nullptr, 594 .pEnabledFeatures = nullptr,
555 }; 595 };
556
557 VkDevice device; 596 VkDevice device;
558 if (dispatch.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) { 597 Check(dispatch.vkCreateDevice(physical_device, &ci, nullptr, &device));
559 return {};
560 }
561 Load(device, dispatch); 598 Load(device, dispatch);
562 return Device(device, dispatch); 599 return Device(device, dispatch);
563} 600}
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 76f790eab..912cab46c 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -9,6 +9,7 @@
9#include <limits> 9#include <limits>
10#include <memory> 10#include <memory>
11#include <optional> 11#include <optional>
12#include <span>
12#include <type_traits> 13#include <type_traits>
13#include <utility> 14#include <utility>
14#include <vector> 15#include <vector>
@@ -18,6 +19,10 @@
18 19
19#include "common/common_types.h" 20#include "common/common_types.h"
20 21
22#ifdef _MSC_VER
23#pragma warning(disable : 26812) // Disable prefer enum class over enum
24#endif
25
21namespace Vulkan::vk { 26namespace Vulkan::vk {
22 27
23/** 28/**
@@ -41,6 +46,9 @@ public:
41 /// Construct an empty span. 46 /// Construct an empty span.
42 constexpr Span() noexcept = default; 47 constexpr Span() noexcept = default;
43 48
49 /// Construct an empty span
50 constexpr Span(std::nullptr_t) noexcept {}
51
44 /// Construct a span from a single element. 52 /// Construct a span from a single element.
45 constexpr Span(const T& value) noexcept : ptr{&value}, num{1} {} 53 constexpr Span(const T& value) noexcept : ptr{&value}, num{1} {}
46 54
@@ -177,6 +185,7 @@ struct DeviceDispatch : public InstanceDispatch {
177 PFN_vkCmdBeginQuery vkCmdBeginQuery; 185 PFN_vkCmdBeginQuery vkCmdBeginQuery;
178 PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass; 186 PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass;
179 PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT; 187 PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT;
188 PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT;
180 PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets; 189 PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets;
181 PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer; 190 PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer;
182 PFN_vkCmdBindPipeline vkCmdBindPipeline; 191 PFN_vkCmdBindPipeline vkCmdBindPipeline;
@@ -194,6 +203,7 @@ struct DeviceDispatch : public InstanceDispatch {
194 PFN_vkCmdEndQuery vkCmdEndQuery; 203 PFN_vkCmdEndQuery vkCmdEndQuery;
195 PFN_vkCmdEndRenderPass vkCmdEndRenderPass; 204 PFN_vkCmdEndRenderPass vkCmdEndRenderPass;
196 PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT; 205 PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT;
206 PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT;
197 PFN_vkCmdFillBuffer vkCmdFillBuffer; 207 PFN_vkCmdFillBuffer vkCmdFillBuffer;
198 PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier; 208 PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier;
199 PFN_vkCmdPushConstants vkCmdPushConstants; 209 PFN_vkCmdPushConstants vkCmdPushConstants;
@@ -217,6 +227,7 @@ struct DeviceDispatch : public InstanceDispatch {
217 PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT; 227 PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT;
218 PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT; 228 PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT;
219 PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT; 229 PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT;
230 PFN_vkCmdResolveImage vkCmdResolveImage;
220 PFN_vkCreateBuffer vkCreateBuffer; 231 PFN_vkCreateBuffer vkCreateBuffer;
221 PFN_vkCreateBufferView vkCreateBufferView; 232 PFN_vkCreateBufferView vkCreateBufferView;
222 PFN_vkCreateCommandPool vkCreateCommandPool; 233 PFN_vkCreateCommandPool vkCreateCommandPool;
@@ -272,6 +283,8 @@ struct DeviceDispatch : public InstanceDispatch {
272 PFN_vkQueueSubmit vkQueueSubmit; 283 PFN_vkQueueSubmit vkQueueSubmit;
273 PFN_vkResetFences vkResetFences; 284 PFN_vkResetFences vkResetFences;
274 PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT; 285 PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT;
286 PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectNameEXT;
287 PFN_vkSetDebugUtilsObjectTagEXT vkSetDebugUtilsObjectTagEXT;
275 PFN_vkUnmapMemory vkUnmapMemory; 288 PFN_vkUnmapMemory vkUnmapMemory;
276 PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; 289 PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR;
277 PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets; 290 PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets;
@@ -542,18 +555,14 @@ private:
542 const DeviceDispatch* dld = nullptr; 555 const DeviceDispatch* dld = nullptr;
543}; 556};
544 557
545using BufferView = Handle<VkBufferView, VkDevice, DeviceDispatch>; 558using DebugUtilsMessenger = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>;
546using DebugCallback = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>;
547using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>; 559using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>;
548using DescriptorUpdateTemplateKHR = Handle<VkDescriptorUpdateTemplateKHR, VkDevice, DeviceDispatch>; 560using DescriptorUpdateTemplateKHR = Handle<VkDescriptorUpdateTemplateKHR, VkDevice, DeviceDispatch>;
549using Framebuffer = Handle<VkFramebuffer, VkDevice, DeviceDispatch>;
550using ImageView = Handle<VkImageView, VkDevice, DeviceDispatch>;
551using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>; 561using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>;
552using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>; 562using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>;
553using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>; 563using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>;
554using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>; 564using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>;
555using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>; 565using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>;
556using ShaderModule = Handle<VkShaderModule, VkDevice, DeviceDispatch>;
557using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>; 566using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>;
558 567
559using DescriptorSets = PoolAllocations<VkDescriptorSet, VkDescriptorPool>; 568using DescriptorSets = PoolAllocations<VkDescriptorSet, VkDescriptorPool>;
@@ -564,16 +573,25 @@ class Instance : public Handle<VkInstance, NoOwner, InstanceDispatch> {
564 using Handle<VkInstance, NoOwner, InstanceDispatch>::Handle; 573 using Handle<VkInstance, NoOwner, InstanceDispatch>::Handle;
565 574
566public: 575public:
567 /// Creates a Vulkan instance. Use "operator bool" for error handling. 576 /// Creates a Vulkan instance.
577 /// @throw Exception on initialization error.
568 static Instance Create(u32 version, Span<const char*> layers, Span<const char*> extensions, 578 static Instance Create(u32 version, Span<const char*> layers, Span<const char*> extensions,
569 InstanceDispatch& dispatch) noexcept; 579 InstanceDispatch& dispatch);
570 580
571 /// Enumerates physical devices. 581 /// Enumerates physical devices.
572 /// @return Physical devices and an empty handle on failure. 582 /// @return Physical devices and an empty handle on failure.
573 std::optional<std::vector<VkPhysicalDevice>> EnumeratePhysicalDevices(); 583 /// @throw Exception on Vulkan error.
584 std::vector<VkPhysicalDevice> EnumeratePhysicalDevices() const;
585
586 /// Creates a debug callback messenger.
587 /// @throw Exception on creation failure.
588 DebugUtilsMessenger CreateDebugUtilsMessenger(
589 const VkDebugUtilsMessengerCreateInfoEXT& create_info) const;
574 590
575 /// Tries to create a debug callback messenger. Returns an empty handle on failure. 591 /// Returns dispatch table.
576 DebugCallback TryCreateDebugCallback(PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept; 592 const InstanceDispatch& Dispatch() const noexcept {
593 return *dld;
594 }
577}; 595};
578 596
579class Queue { 597class Queue {
@@ -605,6 +623,17 @@ class Buffer : public Handle<VkBuffer, VkDevice, DeviceDispatch> {
605public: 623public:
606 /// Attaches a memory allocation. 624 /// Attaches a memory allocation.
607 void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; 625 void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const;
626
627 /// Set object name.
628 void SetObjectNameEXT(const char* name) const;
629};
630
631class BufferView : public Handle<VkBufferView, VkDevice, DeviceDispatch> {
632 using Handle<VkBufferView, VkDevice, DeviceDispatch>::Handle;
633
634public:
635 /// Set object name.
636 void SetObjectNameEXT(const char* name) const;
608}; 637};
609 638
610class Image : public Handle<VkImage, VkDevice, DeviceDispatch> { 639class Image : public Handle<VkImage, VkDevice, DeviceDispatch> {
@@ -613,12 +642,26 @@ class Image : public Handle<VkImage, VkDevice, DeviceDispatch> {
613public: 642public:
614 /// Attaches a memory allocation. 643 /// Attaches a memory allocation.
615 void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; 644 void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const;
645
646 /// Set object name.
647 void SetObjectNameEXT(const char* name) const;
648};
649
650class ImageView : public Handle<VkImageView, VkDevice, DeviceDispatch> {
651 using Handle<VkImageView, VkDevice, DeviceDispatch>::Handle;
652
653public:
654 /// Set object name.
655 void SetObjectNameEXT(const char* name) const;
616}; 656};
617 657
618class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> { 658class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> {
619 using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle; 659 using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle;
620 660
621public: 661public:
662 /// Set object name.
663 void SetObjectNameEXT(const char* name) const;
664
622 u8* Map(VkDeviceSize offset, VkDeviceSize size) const { 665 u8* Map(VkDeviceSize offset, VkDeviceSize size) const {
623 void* data; 666 void* data;
624 Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data)); 667 Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data));
@@ -634,6 +677,9 @@ class Fence : public Handle<VkFence, VkDevice, DeviceDispatch> {
634 using Handle<VkFence, VkDevice, DeviceDispatch>::Handle; 677 using Handle<VkFence, VkDevice, DeviceDispatch>::Handle;
635 678
636public: 679public:
680 /// Set object name.
681 void SetObjectNameEXT(const char* name) const;
682
637 VkResult Wait(u64 timeout = std::numeric_limits<u64>::max()) const noexcept { 683 VkResult Wait(u64 timeout = std::numeric_limits<u64>::max()) const noexcept {
638 return dld->vkWaitForFences(owner, 1, &handle, true, timeout); 684 return dld->vkWaitForFences(owner, 1, &handle, true, timeout);
639 } 685 }
@@ -647,11 +693,22 @@ public:
647 } 693 }
648}; 694};
649 695
696class Framebuffer : public Handle<VkFramebuffer, VkDevice, DeviceDispatch> {
697 using Handle<VkFramebuffer, VkDevice, DeviceDispatch>::Handle;
698
699public:
700 /// Set object name.
701 void SetObjectNameEXT(const char* name) const;
702};
703
650class DescriptorPool : public Handle<VkDescriptorPool, VkDevice, DeviceDispatch> { 704class DescriptorPool : public Handle<VkDescriptorPool, VkDevice, DeviceDispatch> {
651 using Handle<VkDescriptorPool, VkDevice, DeviceDispatch>::Handle; 705 using Handle<VkDescriptorPool, VkDevice, DeviceDispatch>::Handle;
652 706
653public: 707public:
654 DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const; 708 DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const;
709
710 /// Set object name.
711 void SetObjectNameEXT(const char* name) const;
655}; 712};
656 713
657class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> { 714class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> {
@@ -660,6 +717,9 @@ class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> {
660public: 717public:
661 CommandBuffers Allocate(std::size_t num_buffers, 718 CommandBuffers Allocate(std::size_t num_buffers,
662 VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const; 719 VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const;
720
721 /// Set object name.
722 void SetObjectNameEXT(const char* name) const;
663}; 723};
664 724
665class SwapchainKHR : public Handle<VkSwapchainKHR, VkDevice, DeviceDispatch> { 725class SwapchainKHR : public Handle<VkSwapchainKHR, VkDevice, DeviceDispatch> {
@@ -673,15 +733,29 @@ class Event : public Handle<VkEvent, VkDevice, DeviceDispatch> {
673 using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle; 733 using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle;
674 734
675public: 735public:
736 /// Set object name.
737 void SetObjectNameEXT(const char* name) const;
738
676 VkResult GetStatus() const noexcept { 739 VkResult GetStatus() const noexcept {
677 return dld->vkGetEventStatus(owner, handle); 740 return dld->vkGetEventStatus(owner, handle);
678 } 741 }
679}; 742};
680 743
744class ShaderModule : public Handle<VkShaderModule, VkDevice, DeviceDispatch> {
745 using Handle<VkShaderModule, VkDevice, DeviceDispatch>::Handle;
746
747public:
748 /// Set object name.
749 void SetObjectNameEXT(const char* name) const;
750};
751
681class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> { 752class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> {
682 using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle; 753 using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle;
683 754
684public: 755public:
756 /// Set object name.
757 void SetObjectNameEXT(const char* name) const;
758
685 [[nodiscard]] u64 GetCounter() const { 759 [[nodiscard]] u64 GetCounter() const {
686 u64 value; 760 u64 value;
687 Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value)); 761 Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value));
@@ -722,7 +796,7 @@ class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
722public: 796public:
723 static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, 797 static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
724 Span<const char*> enabled_extensions, const void* next, 798 Span<const char*> enabled_extensions, const void* next,
725 DeviceDispatch& dispatch) noexcept; 799 DeviceDispatch& dispatch);
726 800
727 Queue GetQueue(u32 family_index) const noexcept; 801 Queue GetQueue(u32 family_index) const noexcept;
728 802
@@ -932,6 +1006,12 @@ public:
932 regions.data(), filter); 1006 regions.data(), filter);
933 } 1007 }
934 1008
1009 void ResolveImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image,
1010 VkImageLayout dst_layout, Span<VkImageResolve> regions) {
1011 dld->vkCmdResolveImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(),
1012 regions.data());
1013 }
1014
935 void Dispatch(u32 x, u32 y, u32 z) const noexcept { 1015 void Dispatch(u32 x, u32 y, u32 z) const noexcept {
936 dld->vkCmdDispatch(handle, x, y, z); 1016 dld->vkCmdDispatch(handle, x, y, z);
937 } 1017 }
@@ -946,6 +1026,23 @@ public:
946 image_barriers.size(), image_barriers.data()); 1026 image_barriers.size(), image_barriers.data());
947 } 1027 }
948 1028
1029 void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
1030 VkDependencyFlags dependency_flags = 0) const noexcept {
1031 PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, {});
1032 }
1033
1034 void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
1035 VkDependencyFlags dependency_flags,
1036 const VkBufferMemoryBarrier& buffer_barrier) const noexcept {
1037 PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {});
1038 }
1039
1040 void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
1041 VkDependencyFlags dependency_flags,
1042 const VkImageMemoryBarrier& image_barrier) const noexcept {
1043 PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, image_barrier);
1044 }
1045
949 void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout, 1046 void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout,
950 Span<VkBufferImageCopy> regions) const noexcept { 1047 Span<VkBufferImageCopy> regions) const noexcept {
951 dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(), 1048 dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(),
@@ -979,6 +1076,13 @@ public:
979 dld->vkCmdPushConstants(handle, layout, flags, offset, size, values); 1076 dld->vkCmdPushConstants(handle, layout, flags, offset, size, values);
980 } 1077 }
981 1078
1079 template <typename T>
1080 void PushConstants(VkPipelineLayout layout, VkShaderStageFlags flags,
1081 const T& data) const noexcept {
1082 static_assert(std::is_trivially_copyable_v<T>, "<data> is not trivially copyable");
1083 dld->vkCmdPushConstants(handle, layout, flags, 0, static_cast<u32>(sizeof(T)), &data);
1084 }
1085
982 void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept { 1086 void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept {
983 dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data()); 1087 dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data());
984 } 1088 }
@@ -1088,6 +1192,20 @@ public:
1088 counter_buffers, counter_buffer_offsets); 1192 counter_buffers, counter_buffer_offsets);
1089 } 1193 }
1090 1194
1195 void BeginDebugUtilsLabelEXT(const char* label, std::span<float, 4> color) const noexcept {
1196 const VkDebugUtilsLabelEXT label_info{
1197 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
1198 .pNext = nullptr,
1199 .pLabelName = label,
1200 .color{color[0], color[1], color[2], color[3]},
1201 };
1202 dld->vkCmdBeginDebugUtilsLabelEXT(handle, &label_info);
1203 }
1204
1205 void EndDebugUtilsLabelEXT() const noexcept {
1206 dld->vkCmdEndDebugUtilsLabelEXT(handle);
1207 }
1208
1091private: 1209private:
1092 VkCommandBuffer handle; 1210 VkCommandBuffer handle;
1093 const DeviceDispatch* dld; 1211 const DeviceDispatch* dld;
diff --git a/src/yuzu/applets/controller.cpp b/src/yuzu/applets/controller.cpp
index a15e8ca2a..c680fd2c2 100644
--- a/src/yuzu/applets/controller.cpp
+++ b/src/yuzu/applets/controller.cpp
@@ -535,7 +535,7 @@ void QtControllerSelectorDialog::UpdateControllerState(std::size_t player_index)
535 // This emulates a delay between disconnecting and reconnecting controllers as some games 535 // This emulates a delay between disconnecting and reconnecting controllers as some games
536 // do not respond to a change in controller type if it was instantaneous. 536 // do not respond to a change in controller type if it was instantaneous.
537 using namespace std::chrono_literals; 537 using namespace std::chrono_literals;
538 std::this_thread::sleep_for(20ms); 538 std::this_thread::sleep_for(60ms);
539 539
540 UpdateController(controller_type, player_index, player_connected); 540 UpdateController(controller_type, player_index, player_connected);
541} 541}
diff --git a/src/yuzu/applets/error.cpp b/src/yuzu/applets/error.cpp
index 53a993cf6..8ee03ddb3 100644
--- a/src/yuzu/applets/error.cpp
+++ b/src/yuzu/applets/error.cpp
@@ -19,7 +19,7 @@ QtErrorDisplay::~QtErrorDisplay() = default;
19void QtErrorDisplay::ShowError(ResultCode error, std::function<void()> finished) const { 19void QtErrorDisplay::ShowError(ResultCode error, std::function<void()> finished) const {
20 callback = std::move(finished); 20 callback = std::move(finished);
21 emit MainWindowDisplayError( 21 emit MainWindowDisplayError(
22 tr("An error has occured.\nPlease try again or contact the developer of the " 22 tr("An error has occurred.\nPlease try again or contact the developer of the "
23 "software.\n\nError Code: %1-%2 (0x%3)") 23 "software.\n\nError Code: %1-%2 (0x%3)")
24 .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0')) 24 .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0'))
25 .arg(error.description, 4, 10, QChar::fromLatin1('0')) 25 .arg(error.description, 4, 10, QChar::fromLatin1('0'))
@@ -32,7 +32,7 @@ void QtErrorDisplay::ShowErrorWithTimestamp(ResultCode error, std::chrono::secon
32 32
33 const QDateTime date_time = QDateTime::fromSecsSinceEpoch(time.count()); 33 const QDateTime date_time = QDateTime::fromSecsSinceEpoch(time.count());
34 emit MainWindowDisplayError( 34 emit MainWindowDisplayError(
35 tr("An error occured on %1 at %2.\nPlease try again or contact the " 35 tr("An error occurred on %1 at %2.\nPlease try again or contact the "
36 "developer of the software.\n\nError Code: %3-%4 (0x%5)") 36 "developer of the software.\n\nError Code: %3-%4 (0x%5)")
37 .arg(date_time.toString(QStringLiteral("dddd, MMMM d, yyyy"))) 37 .arg(date_time.toString(QStringLiteral("dddd, MMMM d, yyyy")))
38 .arg(date_time.toString(QStringLiteral("h:mm:ss A"))) 38 .arg(date_time.toString(QStringLiteral("h:mm:ss A")))
@@ -46,7 +46,7 @@ void QtErrorDisplay::ShowCustomErrorText(ResultCode error, std::string dialog_te
46 std::function<void()> finished) const { 46 std::function<void()> finished) const {
47 callback = std::move(finished); 47 callback = std::move(finished);
48 emit MainWindowDisplayError( 48 emit MainWindowDisplayError(
49 tr("An error has occured.\nError Code: %1-%2 (0x%3)\n\n%4\n\n%5") 49 tr("An error has occurred.\nError Code: %1-%2 (0x%3)\n\n%4\n\n%5")
50 .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0')) 50 .arg(static_cast<u32>(error.module.Value()) + 2000, 4, 10, QChar::fromLatin1('0'))
51 .arg(error.description, 4, 10, QChar::fromLatin1('0')) 51 .arg(error.description, 4, 10, QChar::fromLatin1('0'))
52 .arg(error.raw, 8, 16, QChar::fromLatin1('0')) 52 .arg(error.raw, 8, 16, QChar::fromLatin1('0'))
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index e124836b5..85ee2577d 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -397,7 +397,7 @@ void GRenderWindow::mousePressEvent(QMouseEvent* event) {
397 this->TouchPressed(x, y); 397 this->TouchPressed(x, y);
398 } 398 }
399 399
400 QWidget::mousePressEvent(event); 400 emit MouseActivity();
401} 401}
402 402
403void GRenderWindow::mouseMoveEvent(QMouseEvent* event) { 403void GRenderWindow::mouseMoveEvent(QMouseEvent* event) {
@@ -411,7 +411,7 @@ void GRenderWindow::mouseMoveEvent(QMouseEvent* event) {
411 input_subsystem->GetMouse()->MouseMove(x, y); 411 input_subsystem->GetMouse()->MouseMove(x, y);
412 this->TouchMoved(x, y); 412 this->TouchMoved(x, y);
413 413
414 QWidget::mouseMoveEvent(event); 414 emit MouseActivity();
415} 415}
416 416
417void GRenderWindow::mouseReleaseEvent(QMouseEvent* event) { 417void GRenderWindow::mouseReleaseEvent(QMouseEvent* event) {
@@ -688,3 +688,10 @@ void GRenderWindow::showEvent(QShowEvent* event) {
688 connect(windowHandle(), &QWindow::screenChanged, this, &GRenderWindow::OnFramebufferSizeChanged, 688 connect(windowHandle(), &QWindow::screenChanged, this, &GRenderWindow::OnFramebufferSizeChanged,
689 Qt::UniqueConnection); 689 Qt::UniqueConnection);
690} 690}
691
692bool GRenderWindow::eventFilter(QObject* object, QEvent* event) {
693 if (event->type() == QEvent::HoverMove) {
694 emit MouseActivity();
695 }
696 return false;
697}
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index ebe5cb965..339095509 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -184,6 +184,7 @@ signals:
184 void Closed(); 184 void Closed();
185 void FirstFrameDisplayed(); 185 void FirstFrameDisplayed();
186 void ExecuteProgramSignal(std::size_t program_index); 186 void ExecuteProgramSignal(std::size_t program_index);
187 void MouseActivity();
187 188
188private: 189private:
189 void TouchBeginEvent(const QTouchEvent* event); 190 void TouchBeginEvent(const QTouchEvent* event);
@@ -216,4 +217,5 @@ private:
216 217
217protected: 218protected:
218 void showEvent(QShowEvent* event) override; 219 void showEvent(QShowEvent* event) override;
220 bool eventFilter(QObject* object, QEvent* event) override;
219}; 221};
diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp
index 649912557..a470056ef 100644
--- a/src/yuzu/compatdb.cpp
+++ b/src/yuzu/compatdb.cpp
@@ -72,7 +72,7 @@ void CompatDB::Submit() {
72void CompatDB::OnTestcaseSubmitted() { 72void CompatDB::OnTestcaseSubmitted() {
73 if (!testcase_watcher.result()) { 73 if (!testcase_watcher.result()) {
74 QMessageBox::critical(this, tr("Communication error"), 74 QMessageBox::critical(this, tr("Communication error"),
75 tr("An error occured while sending the Testcase")); 75 tr("An error occurred while sending the Testcase"));
76 button(NextButton)->setEnabled(true); 76 button(NextButton)->setEnabled(true);
77 button(NextButton)->setText(tr("Next")); 77 button(NextButton)->setText(tr("Next"));
78 button(CancelButton)->setVisible(true); 78 button(CancelButton)->setVisible(true);
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 9fb254986..cda448718 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -514,7 +514,7 @@ void Config::ReadControlValues() {
514 Settings::values.emulate_analog_keyboard = 514 Settings::values.emulate_analog_keyboard =
515 ReadSetting(QStringLiteral("emulate_analog_keyboard"), false).toBool(); 515 ReadSetting(QStringLiteral("emulate_analog_keyboard"), false).toBool();
516 516
517 ReadSettingGlobal(Settings::values.use_docked_mode, QStringLiteral("use_docked_mode"), false); 517 ReadSettingGlobal(Settings::values.use_docked_mode, QStringLiteral("use_docked_mode"), true);
518 ReadSettingGlobal(Settings::values.vibration_enabled, QStringLiteral("vibration_enabled"), 518 ReadSettingGlobal(Settings::values.vibration_enabled, QStringLiteral("vibration_enabled"),
519 true); 519 true);
520 ReadSettingGlobal(Settings::values.enable_accurate_vibrations, 520 ReadSettingGlobal(Settings::values.enable_accurate_vibrations,
@@ -764,6 +764,8 @@ void Config::ReadCpuValues() {
764 ReadSetting(QStringLiteral("cpuopt_unsafe_unfuse_fma"), true).toBool(); 764 ReadSetting(QStringLiteral("cpuopt_unsafe_unfuse_fma"), true).toBool();
765 Settings::values.cpuopt_unsafe_reduce_fp_error = 765 Settings::values.cpuopt_unsafe_reduce_fp_error =
766 ReadSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true).toBool(); 766 ReadSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true).toBool();
767 Settings::values.cpuopt_unsafe_inaccurate_nan =
768 ReadSetting(QStringLiteral("cpuopt_unsafe_inaccurate_nan"), true).toBool();
767 } 769 }
768 770
769 qt_config->endGroup(); 771 qt_config->endGroup();
@@ -1174,7 +1176,7 @@ void Config::SaveControlValues() {
1174 SaveTouchscreenValues(); 1176 SaveTouchscreenValues();
1175 SaveMotionTouchValues(); 1177 SaveMotionTouchValues();
1176 1178
1177 WriteSettingGlobal(QStringLiteral("use_docked_mode"), Settings::values.use_docked_mode, false); 1179 WriteSettingGlobal(QStringLiteral("use_docked_mode"), Settings::values.use_docked_mode, true);
1178 WriteSettingGlobal(QStringLiteral("vibration_enabled"), Settings::values.vibration_enabled, 1180 WriteSettingGlobal(QStringLiteral("vibration_enabled"), Settings::values.vibration_enabled,
1179 true); 1181 true);
1180 WriteSettingGlobal(QStringLiteral("enable_accurate_vibrations"), 1182 WriteSettingGlobal(QStringLiteral("enable_accurate_vibrations"),
@@ -1327,6 +1329,8 @@ void Config::SaveCpuValues() {
1327 Settings::values.cpuopt_unsafe_unfuse_fma, true); 1329 Settings::values.cpuopt_unsafe_unfuse_fma, true);
1328 WriteSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), 1330 WriteSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"),
1329 Settings::values.cpuopt_unsafe_reduce_fp_error, true); 1331 Settings::values.cpuopt_unsafe_reduce_fp_error, true);
1332 WriteSetting(QStringLiteral("cpuopt_unsafe_inaccurate_nan"),
1333 Settings::values.cpuopt_unsafe_inaccurate_nan, true);
1330 } 1334 }
1331 1335
1332 qt_config->endGroup(); 1336 qt_config->endGroup();
@@ -1589,14 +1593,12 @@ void Config::WriteSettingGlobal(const QString& name, const QVariant& value, bool
1589 1593
1590void Config::Reload() { 1594void Config::Reload() {
1591 ReadValues(); 1595 ReadValues();
1592 Settings::Sanitize();
1593 // To apply default value changes 1596 // To apply default value changes
1594 SaveValues(); 1597 SaveValues();
1595 Settings::Apply(Core::System::GetInstance()); 1598 Settings::Apply(Core::System::GetInstance());
1596} 1599}
1597 1600
1598void Config::Save() { 1601void Config::Save() {
1599 Settings::Sanitize();
1600 SaveValues(); 1602 SaveValues();
1601} 1603}
1602 1604
diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp
index 37fcd6adc..d055cbd60 100644
--- a/src/yuzu/configuration/configure_cpu.cpp
+++ b/src/yuzu/configuration/configure_cpu.cpp
@@ -36,6 +36,8 @@ void ConfigureCpu::SetConfiguration() {
36 ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma); 36 ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma);
37 ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock); 37 ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock);
38 ui->cpuopt_unsafe_reduce_fp_error->setChecked(Settings::values.cpuopt_unsafe_reduce_fp_error); 38 ui->cpuopt_unsafe_reduce_fp_error->setChecked(Settings::values.cpuopt_unsafe_reduce_fp_error);
39 ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock);
40 ui->cpuopt_unsafe_inaccurate_nan->setChecked(Settings::values.cpuopt_unsafe_inaccurate_nan);
39} 41}
40 42
41void ConfigureCpu::AccuracyUpdated(int index) { 43void ConfigureCpu::AccuracyUpdated(int index) {
@@ -61,6 +63,7 @@ void ConfigureCpu::ApplyConfiguration() {
61 static_cast<Settings::CPUAccuracy>(ui->accuracy->currentIndex()); 63 static_cast<Settings::CPUAccuracy>(ui->accuracy->currentIndex());
62 Settings::values.cpuopt_unsafe_unfuse_fma = ui->cpuopt_unsafe_unfuse_fma->isChecked(); 64 Settings::values.cpuopt_unsafe_unfuse_fma = ui->cpuopt_unsafe_unfuse_fma->isChecked();
63 Settings::values.cpuopt_unsafe_reduce_fp_error = ui->cpuopt_unsafe_reduce_fp_error->isChecked(); 65 Settings::values.cpuopt_unsafe_reduce_fp_error = ui->cpuopt_unsafe_reduce_fp_error->isChecked();
66 Settings::values.cpuopt_unsafe_inaccurate_nan = ui->cpuopt_unsafe_inaccurate_nan->isChecked();
64} 67}
65 68
66void ConfigureCpu::changeEvent(QEvent* event) { 69void ConfigureCpu::changeEvent(QEvent* event) {
diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui
index ebdd2e6e9..bcd0962e9 100644
--- a/src/yuzu/configuration/configure_cpu.ui
+++ b/src/yuzu/configuration/configure_cpu.ui
@@ -109,6 +109,18 @@
109 </property> 109 </property>
110 </widget> 110 </widget>
111 </item> 111 </item>
112 <item>
113 <widget class="QCheckBox" name="cpuopt_unsafe_inaccurate_nan">
114 <property name="text">
115 <string>Inaccurate NaN handling</string>
116 </property>
117 <property name="toolTip">
118 <string>
119 &lt;div&gt;This option improves speed by removing NaN checking. Please note this also reduces accuracy of certain floating-point instructions.&lt;/div&gt;
120 </string>
121 </property>
122 </widget>
123 </item>
112 </layout> 124 </layout>
113 </widget> 125 </widget>
114 </item> 126 </item>
diff --git a/src/yuzu/configuration/configure_input.cpp b/src/yuzu/configuration/configure_input.cpp
index d9009091b..567a36d9b 100644
--- a/src/yuzu/configuration/configure_input.cpp
+++ b/src/yuzu/configuration/configure_input.cpp
@@ -4,6 +4,7 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <memory> 6#include <memory>
7#include <thread>
7 8
8#include <QSignalBlocker> 9#include <QSignalBlocker>
9#include <QTimer> 10#include <QTimer>
@@ -181,8 +182,18 @@ QList<QWidget*> ConfigureInput::GetSubTabs() const {
181} 182}
182 183
183void ConfigureInput::ApplyConfiguration() { 184void ConfigureInput::ApplyConfiguration() {
184 for (auto controller : player_controllers) { 185 for (auto* controller : player_controllers) {
185 controller->ApplyConfiguration(); 186 controller->ApplyConfiguration();
187 controller->TryDisconnectSelectedController();
188 }
189
190 // This emulates a delay between disconnecting and reconnecting controllers as some games
191 // do not respond to a change in controller type if it was instantaneous.
192 using namespace std::chrono_literals;
193 std::this_thread::sleep_for(60ms);
194
195 for (auto* controller : player_controllers) {
196 controller->TryConnectSelectedController();
186 } 197 }
187 198
188 advanced->ApplyConfiguration(); 199 advanced->ApplyConfiguration();
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index f9915fb7a..46ea026e4 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -4,7 +4,6 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <memory> 6#include <memory>
7#include <thread>
8#include <utility> 7#include <utility>
9#include <QGridLayout> 8#include <QGridLayout>
10#include <QInputDialog> 9#include <QInputDialog>
@@ -173,61 +172,31 @@ QString AnalogToText(const Common::ParamPackage& param, const std::string& dir)
173 return ButtonToText(Common::ParamPackage{param.Get(dir, "")}); 172 return ButtonToText(Common::ParamPackage{param.Get(dir, "")});
174 } 173 }
175 174
176 if (param.Get("engine", "") == "sdl") { 175 const auto engine_str = param.Get("engine", "");
177 if (dir == "modifier") { 176 const QString axis_x_str = QString::fromStdString(param.Get("axis_x", ""));
178 return QObject::tr("[unused]"); 177 const QString axis_y_str = QString::fromStdString(param.Get("axis_y", ""));
179 } 178 const bool invert_x = param.Get("invert_x", "+") == "-";
180 179 const bool invert_y = param.Get("invert_y", "+") == "-";
181 if (dir == "left" || dir == "right") { 180 if (engine_str == "sdl" || engine_str == "gcpad" || engine_str == "mouse") {
182 const QString axis_x_str = QString::fromStdString(param.Get("axis_x", ""));
183
184 return QObject::tr("Axis %1").arg(axis_x_str);
185 }
186
187 if (dir == "up" || dir == "down") {
188 const QString axis_y_str = QString::fromStdString(param.Get("axis_y", ""));
189
190 return QObject::tr("Axis %1").arg(axis_y_str);
191 }
192
193 return {};
194 }
195
196 if (param.Get("engine", "") == "gcpad") {
197 if (dir == "modifier") { 181 if (dir == "modifier") {
198 return QObject::tr("[unused]"); 182 return QObject::tr("[unused]");
199 } 183 }
200 184
201 if (dir == "left" || dir == "right") { 185 if (dir == "left") {
202 const QString axis_x_str = QString::fromStdString(param.Get("axis_x", "")); 186 const QString invert_x_str = QString::fromStdString(invert_x ? "+" : "-");
203 187 return QObject::tr("Axis %1%2").arg(axis_x_str, invert_x_str);
204 return QObject::tr("GC Axis %1").arg(axis_x_str);
205 } 188 }
206 189 if (dir == "right") {
207 if (dir == "up" || dir == "down") { 190 const QString invert_x_str = QString::fromStdString(invert_x ? "-" : "+");
208 const QString axis_y_str = QString::fromStdString(param.Get("axis_y", "")); 191 return QObject::tr("Axis %1%2").arg(axis_x_str, invert_x_str);
209
210 return QObject::tr("GC Axis %1").arg(axis_y_str);
211 }
212
213 return {};
214 }
215
216 if (param.Get("engine", "") == "mouse") {
217 if (dir == "modifier") {
218 return QObject::tr("[unused]");
219 } 192 }
220 193 if (dir == "up") {
221 if (dir == "left" || dir == "right") { 194 const QString invert_y_str = QString::fromStdString(invert_y ? "-" : "+");
222 const QString axis_x_str = QString::fromStdString(param.Get("axis_x", "")); 195 return QObject::tr("Axis %1%2").arg(axis_y_str, invert_y_str);
223
224 return QObject::tr("Mouse %1").arg(axis_x_str);
225 } 196 }
226 197 if (dir == "down") {
227 if (dir == "up" || dir == "down") { 198 const QString invert_y_str = QString::fromStdString(invert_y ? "+" : "-");
228 const QString axis_y_str = QString::fromStdString(param.Get("axis_y", "")); 199 return QObject::tr("Axis %1%2").arg(axis_y_str, invert_y_str);
229
230 return QObject::tr("Mouse %1").arg(axis_y_str);
231 } 200 }
232 201
233 return {}; 202 return {};
@@ -396,6 +365,25 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
396 analogs_param[analog_id].Clear(); 365 analogs_param[analog_id].Clear();
397 analog_map_buttons[analog_id][sub_button_id]->setText(tr("[not set]")); 366 analog_map_buttons[analog_id][sub_button_id]->setText(tr("[not set]"));
398 }); 367 });
368 context_menu.addAction(tr("Invert axis"), [&] {
369 if (sub_button_id == 2 || sub_button_id == 3) {
370 const bool invert_value =
371 analogs_param[analog_id].Get("invert_x", "+") == "-";
372 const std::string invert_str = invert_value ? "+" : "-";
373 analogs_param[analog_id].Set("invert_x", invert_str);
374 }
375 if (sub_button_id == 0 || sub_button_id == 1) {
376 const bool invert_value =
377 analogs_param[analog_id].Get("invert_y", "+") == "-";
378 const std::string invert_str = invert_value ? "+" : "-";
379 analogs_param[analog_id].Set("invert_y", invert_str);
380 }
381 for (int sub_button_id = 0; sub_button_id < ANALOG_SUB_BUTTONS_NUM;
382 ++sub_button_id) {
383 analog_map_buttons[analog_id][sub_button_id]->setText(AnalogToText(
384 analogs_param[analog_id], analog_sub_buttons[sub_button_id]));
385 }
386 });
399 context_menu.exec(analog_map_buttons[analog_id][sub_button_id]->mapToGlobal( 387 context_menu.exec(analog_map_buttons[analog_id][sub_button_id]->mapToGlobal(
400 menu_location)); 388 menu_location));
401 }); 389 });
@@ -587,6 +575,10 @@ void ConfigureInputPlayer::ApplyConfiguration() {
587 575
588 std::transform(motions_param.begin(), motions_param.end(), motions.begin(), 576 std::transform(motions_param.begin(), motions_param.end(), motions.begin(),
589 [](const Common::ParamPackage& param) { return param.Serialize(); }); 577 [](const Common::ParamPackage& param) { return param.Serialize(); });
578}
579
580void ConfigureInputPlayer::TryConnectSelectedController() {
581 auto& player = Settings::values.players.GetValue()[player_index];
590 582
591 const auto controller_type = 583 const auto controller_type =
592 GetControllerTypeFromIndex(ui->comboControllerType->currentIndex()); 584 GetControllerTypeFromIndex(ui->comboControllerType->currentIndex());
@@ -599,15 +591,12 @@ void ConfigureInputPlayer::ApplyConfiguration() {
599 return; 591 return;
600 } 592 }
601 593
602 // Disconnect the controller first.
603 UpdateController(controller_type, player_index, false);
604
605 player.controller_type = controller_type; 594 player.controller_type = controller_type;
606 player.connected = player_connected; 595 player.connected = player_connected;
607 596
608 ConfigureVibration::SetVibrationDevices(player_index); 597 ConfigureVibration::SetVibrationDevices(player_index);
609 598
610 // Handheld 599 // Connect/Disconnect Handheld depending on Player 1's controller configuration.
611 if (player_index == 0) { 600 if (player_index == 0) {
612 auto& handheld = Settings::values.players.GetValue()[HANDHELD_INDEX]; 601 auto& handheld = Settings::values.players.GetValue()[HANDHELD_INDEX];
613 if (controller_type == Settings::ControllerType::Handheld) { 602 if (controller_type == Settings::ControllerType::Handheld) {
@@ -622,14 +611,26 @@ void ConfigureInputPlayer::ApplyConfiguration() {
622 return; 611 return;
623 } 612 }
624 613
625 // This emulates a delay between disconnecting and reconnecting controllers as some games
626 // do not respond to a change in controller type if it was instantaneous.
627 using namespace std::chrono_literals;
628 std::this_thread::sleep_for(20ms);
629
630 UpdateController(controller_type, player_index, player_connected); 614 UpdateController(controller_type, player_index, player_connected);
631} 615}
632 616
617void ConfigureInputPlayer::TryDisconnectSelectedController() {
618 const auto& player = Settings::values.players.GetValue()[player_index];
619
620 const auto controller_type =
621 GetControllerTypeFromIndex(ui->comboControllerType->currentIndex());
622 const auto player_connected = ui->groupConnectedController->isChecked() &&
623 controller_type != Settings::ControllerType::Handheld;
624
625 // Do not do anything if the controller configuration has not changed.
626 if (player.controller_type == controller_type && player.connected == player_connected) {
627 return;
628 }
629
630 // Disconnect the controller first.
631 UpdateController(controller_type, player_index, false);
632}
633
633void ConfigureInputPlayer::showEvent(QShowEvent* event) { 634void ConfigureInputPlayer::showEvent(QShowEvent* event) {
634 if (bottom_row == nullptr) { 635 if (bottom_row == nullptr) {
635 return; 636 return;
diff --git a/src/yuzu/configuration/configure_input_player.h b/src/yuzu/configuration/configure_input_player.h
index 9c30879a2..c4ae50de7 100644
--- a/src/yuzu/configuration/configure_input_player.h
+++ b/src/yuzu/configuration/configure_input_player.h
@@ -54,6 +54,18 @@ public:
54 /// Save all button configurations to settings file. 54 /// Save all button configurations to settings file.
55 void ApplyConfiguration(); 55 void ApplyConfiguration();
56 56
57 /**
58 * Attempts to connect the currently selected controller in the HID backend.
59 * This function will not do anything if it is not connected in the frontend.
60 */
61 void TryConnectSelectedController();
62
63 /**
64 * Attempts to disconnect the currently selected controller in the HID backend.
65 * This function will not do anything if the configuration has not changed.
66 */
67 void TryDisconnectSelectedController();
68
57 /// Set the connection state checkbox (used to sync state). 69 /// Set the connection state checkbox (used to sync state).
58 void ConnectPlayer(bool connected); 70 void ConnectPlayer(bool connected);
59 71
diff --git a/src/yuzu/configuration/configure_motion_touch.cpp b/src/yuzu/configuration/configure_motion_touch.cpp
index c2a7113da..eb8eacbf9 100644
--- a/src/yuzu/configuration/configure_motion_touch.cpp
+++ b/src/yuzu/configuration/configure_motion_touch.cpp
@@ -51,6 +51,8 @@ CalibrationConfigurationDialog::CalibrationConfigurationDialog(QWidget* parent,
51 case CalibrationConfigurationJob::Status::Completed: 51 case CalibrationConfigurationJob::Status::Completed:
52 text = tr("Configuration completed!"); 52 text = tr("Configuration completed!");
53 break; 53 break;
54 default:
55 break;
54 } 56 }
55 QMetaObject::invokeMethod(this, "UpdateLabelText", Q_ARG(QString, text)); 57 QMetaObject::invokeMethod(this, "UpdateLabelText", Q_ARG(QString, text));
56 if (status == CalibrationConfigurationJob::Status::Completed) { 58 if (status == CalibrationConfigurationJob::Status::Completed) {
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index ebaccd2ef..2e74037d1 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -142,7 +142,7 @@ constexpr int default_mouse_timeout = 2500;
142/** 142/**
143 * "Callouts" are one-time instructional messages shown to the user. In the config settings, there 143 * "Callouts" are one-time instructional messages shown to the user. In the config settings, there
144 * is a bitfield "callout_flags" options, used to track if a message has already been shown to the 144 * is a bitfield "callout_flags" options, used to track if a message has already been shown to the
145 * user. This is 32-bits - if we have more than 32 callouts, we should retire and recyle old ones. 145 * user. This is 32-bits - if we have more than 32 callouts, we should retire and recycle old ones.
146 */ 146 */
147enum class CalloutFlag : uint32_t { 147enum class CalloutFlag : uint32_t {
148 Telemetry = 0x1, 148 Telemetry = 0x1,
@@ -616,9 +616,8 @@ void GMainWindow::InitializeWidgets() {
616 if (emulation_running) { 616 if (emulation_running) {
617 return; 617 return;
618 } 618 }
619 const bool is_async = !Settings::values.use_asynchronous_gpu_emulation.GetValue() || 619 Settings::values.use_asynchronous_gpu_emulation.SetValue(
620 Settings::values.use_multi_core.GetValue(); 620 !Settings::values.use_asynchronous_gpu_emulation.GetValue());
621 Settings::values.use_asynchronous_gpu_emulation.SetValue(is_async);
622 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue()); 621 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue());
623 Settings::Apply(Core::System::GetInstance()); 622 Settings::Apply(Core::System::GetInstance());
624 }); 623 });
@@ -635,16 +634,13 @@ void GMainWindow::InitializeWidgets() {
635 return; 634 return;
636 } 635 }
637 Settings::values.use_multi_core.SetValue(!Settings::values.use_multi_core.GetValue()); 636 Settings::values.use_multi_core.SetValue(!Settings::values.use_multi_core.GetValue());
638 const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue() ||
639 Settings::values.use_multi_core.GetValue();
640 Settings::values.use_asynchronous_gpu_emulation.SetValue(is_async);
641 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue());
642 multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); 637 multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue());
643 Settings::Apply(Core::System::GetInstance()); 638 Settings::Apply(Core::System::GetInstance());
644 }); 639 });
645 multicore_status_button->setText(tr("MULTICORE")); 640 multicore_status_button->setText(tr("MULTICORE"));
646 multicore_status_button->setCheckable(true); 641 multicore_status_button->setCheckable(true);
647 multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); 642 multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue());
643
648 statusBar()->insertPermanentWidget(0, multicore_status_button); 644 statusBar()->insertPermanentWidget(0, multicore_status_button);
649 statusBar()->insertPermanentWidget(0, async_status_button); 645 statusBar()->insertPermanentWidget(0, async_status_button);
650 646
@@ -1085,20 +1081,24 @@ bool GMainWindow::LoadROM(const QString& filename, std::size_t program_index) {
1085 break; 1081 break;
1086 1082
1087 default: 1083 default:
1088 if (static_cast<u32>(result) > 1084 if (result > Core::System::ResultStatus::ErrorLoader) {
1089 static_cast<u32>(Core::System::ResultStatus::ErrorLoader)) {
1090 const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader); 1085 const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader);
1091 const u16 error_id = static_cast<u16>(result) - loader_id; 1086 const u16 error_id = static_cast<u16>(result) - loader_id;
1092 const std::string error_code = fmt::format("({:04X}-{:04X})", loader_id, error_id); 1087 const std::string error_code = fmt::format("({:04X}-{:04X})", loader_id, error_id);
1093 LOG_CRITICAL(Frontend, "Failed to load ROM! {}", error_code); 1088 LOG_CRITICAL(Frontend, "Failed to load ROM! {}", error_code);
1094 QMessageBox::critical( 1089
1095 this, 1090 const auto title =
1096 tr("Error while loading ROM! ").append(QString::fromStdString(error_code)), 1091 tr("Error while loading ROM! %1", "%1 signifies a numeric error code.")
1097 QString::fromStdString(fmt::format( 1092 .arg(QString::fromStdString(error_code));
1098 "{}<br>Please follow <a href='https://yuzu-emu.org/help/quickstart/'>the " 1093 const auto description =
1099 "yuzu quickstart guide</a> to redump your files.<br>You can refer " 1094 tr("%1<br>Please follow <a href='https://yuzu-emu.org/help/quickstart/'>the "
1100 "to the yuzu wiki</a> or the yuzu Discord</a> for help.", 1095 "yuzu quickstart guide</a> to redump your files.<br>You can refer "
1101 static_cast<Loader::ResultStatus>(error_id)))); 1096 "to the yuzu wiki</a> or the yuzu Discord</a> for help.",
1097 "%1 signifies an error string.")
1098 .arg(QString::fromStdString(
1099 GetResultStatusString(static_cast<Loader::ResultStatus>(error_id))));
1100
1101 QMessageBox::critical(this, title, description);
1102 } else { 1102 } else {
1103 QMessageBox::critical( 1103 QMessageBox::critical(
1104 this, tr("Error while loading ROM!"), 1104 this, tr("Error while loading ROM!"),
@@ -1170,6 +1170,7 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index) {
1170 [this](std::size_t program_index) { render_window->ExecuteProgram(program_index); }); 1170 [this](std::size_t program_index) { render_window->ExecuteProgram(program_index); });
1171 1171
1172 connect(render_window, &GRenderWindow::Closed, this, &GMainWindow::OnStopGame); 1172 connect(render_window, &GRenderWindow::Closed, this, &GMainWindow::OnStopGame);
1173 connect(render_window, &GRenderWindow::MouseActivity, this, &GMainWindow::OnMouseActivity);
1173 // BlockingQueuedConnection is important here, it makes sure we've finished refreshing our views 1174 // BlockingQueuedConnection is important here, it makes sure we've finished refreshing our views
1174 // before the CPU continues 1175 // before the CPU continues
1175 connect(emu_thread.get(), &EmuThread::DebugModeEntered, waitTreeWidget, 1176 connect(emu_thread.get(), &EmuThread::DebugModeEntered, waitTreeWidget,
@@ -1193,8 +1194,8 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index) {
1193 1194
1194 if (UISettings::values.hide_mouse) { 1195 if (UISettings::values.hide_mouse) {
1195 mouse_hide_timer.start(); 1196 mouse_hide_timer.start();
1196 setMouseTracking(true); 1197 render_window->installEventFilter(render_window);
1197 ui.centralwidget->setMouseTracking(true); 1198 render_window->setAttribute(Qt::WA_Hover, true);
1198 } 1199 }
1199 1200
1200 std::string title_name; 1201 std::string title_name;
@@ -1271,8 +1272,8 @@ void GMainWindow::ShutdownGame() {
1271 } 1272 }
1272 game_list->SetFilterFocus(); 1273 game_list->SetFilterFocus();
1273 1274
1274 setMouseTracking(false); 1275 render_window->removeEventFilter(render_window);
1275 ui.centralwidget->setMouseTracking(false); 1276 render_window->setAttribute(Qt::WA_Hover, false);
1276 1277
1277 UpdateWindowTitle(); 1278 UpdateWindowTitle();
1278 1279
@@ -2353,12 +2354,12 @@ void GMainWindow::OnConfigure() {
2353 config->Save(); 2354 config->Save();
2354 2355
2355 if (UISettings::values.hide_mouse && emulation_running) { 2356 if (UISettings::values.hide_mouse && emulation_running) {
2356 setMouseTracking(true); 2357 render_window->installEventFilter(render_window);
2357 ui.centralwidget->setMouseTracking(true); 2358 render_window->setAttribute(Qt::WA_Hover, true);
2358 mouse_hide_timer.start(); 2359 mouse_hide_timer.start();
2359 } else { 2360 } else {
2360 setMouseTracking(false); 2361 render_window->removeEventFilter(render_window);
2361 ui.centralwidget->setMouseTracking(false); 2362 render_window->setAttribute(Qt::WA_Hover, false);
2362 } 2363 }
2363 2364
2364 UpdateStatusButtons(); 2365 UpdateStatusButtons();
@@ -2569,9 +2570,6 @@ void GMainWindow::UpdateStatusBar() {
2569void GMainWindow::UpdateStatusButtons() { 2570void GMainWindow::UpdateStatusButtons() {
2570 dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue()); 2571 dock_status_button->setChecked(Settings::values.use_docked_mode.GetValue());
2571 multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue()); 2572 multicore_status_button->setChecked(Settings::values.use_multi_core.GetValue());
2572 Settings::values.use_asynchronous_gpu_emulation.SetValue(
2573 Settings::values.use_asynchronous_gpu_emulation.GetValue() ||
2574 Settings::values.use_multi_core.GetValue());
2575 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue()); 2573 async_status_button->setChecked(Settings::values.use_asynchronous_gpu_emulation.GetValue());
2576 renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() == 2574 renderer_status_button->setChecked(Settings::values.renderer_backend.GetValue() ==
2577 Settings::RendererBackend::Vulkan); 2575 Settings::RendererBackend::Vulkan);
@@ -2601,21 +2599,17 @@ void GMainWindow::HideMouseCursor() {
2601 ShowMouseCursor(); 2599 ShowMouseCursor();
2602 return; 2600 return;
2603 } 2601 }
2604 setCursor(QCursor(Qt::BlankCursor)); 2602 render_window->setCursor(QCursor(Qt::BlankCursor));
2605} 2603}
2606 2604
2607void GMainWindow::ShowMouseCursor() { 2605void GMainWindow::ShowMouseCursor() {
2608 unsetCursor(); 2606 render_window->unsetCursor();
2609 if (emu_thread != nullptr && UISettings::values.hide_mouse) { 2607 if (emu_thread != nullptr && UISettings::values.hide_mouse) {
2610 mouse_hide_timer.start(); 2608 mouse_hide_timer.start();
2611 } 2609 }
2612} 2610}
2613 2611
2614void GMainWindow::mouseMoveEvent(QMouseEvent* event) { 2612void GMainWindow::OnMouseActivity() {
2615 ShowMouseCursor();
2616}
2617
2618void GMainWindow::mousePressEvent(QMouseEvent* event) {
2619 ShowMouseCursor(); 2613 ShowMouseCursor();
2620} 2614}
2621 2615
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index ea6d2c30d..31788ea62 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -248,6 +248,7 @@ private slots:
248 void OnCoreError(Core::System::ResultStatus, std::string); 248 void OnCoreError(Core::System::ResultStatus, std::string);
249 void OnReinitializeKeys(ReinitializeKeyBehavior behavior); 249 void OnReinitializeKeys(ReinitializeKeyBehavior behavior);
250 void OnLanguageChanged(const QString& locale); 250 void OnLanguageChanged(const QString& locale);
251 void OnMouseActivity();
251 252
252private: 253private:
253 void RemoveBaseContent(u64 program_id, const QString& entry_type); 254 void RemoveBaseContent(u64 program_id, const QString& entry_type);
@@ -335,6 +336,4 @@ protected:
335 void dropEvent(QDropEvent* event) override; 336 void dropEvent(QDropEvent* event) override;
336 void dragEnterEvent(QDragEnterEvent* event) override; 337 void dragEnterEvent(QDragEnterEvent* event) override;
337 void dragMoveEvent(QDragMoveEvent* event) override; 338 void dragMoveEvent(QDragMoveEvent* event) override;
338 void mouseMoveEvent(QMouseEvent* event) override;
339 void mousePressEvent(QMouseEvent* event) override;
340}; 339};
diff --git a/src/yuzu/util/url_request_interceptor.cpp b/src/yuzu/util/url_request_interceptor.cpp
index 2d491d8c0..b637e771e 100644
--- a/src/yuzu/util/url_request_interceptor.cpp
+++ b/src/yuzu/util/url_request_interceptor.cpp
@@ -22,6 +22,8 @@ void UrlRequestInterceptor::interceptRequest(QWebEngineUrlRequestInfo& info) {
22 case QWebEngineUrlRequestInfo::ResourceTypeXhr: 22 case QWebEngineUrlRequestInfo::ResourceTypeXhr:
23 emit FrameChanged(); 23 emit FrameChanged();
24 break; 24 break;
25 default:
26 break;
25 } 27 }
26} 28}
27 29
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 38075c345..41ef6f6b8 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -344,7 +344,7 @@ void Config::ReadValues() {
344 344
345 // System 345 // System
346 Settings::values.use_docked_mode.SetValue( 346 Settings::values.use_docked_mode.SetValue(
347 sdl2_config->GetBoolean("System", "use_docked_mode", false)); 347 sdl2_config->GetBoolean("System", "use_docked_mode", true));
348 348
349 Settings::values.current_user = std::clamp<int>( 349 Settings::values.current_user = std::clamp<int>(
350 sdl2_config->GetInteger("System", "current_user", 0), 0, Service::Account::MAX_USERS - 1); 350 sdl2_config->GetInteger("System", "current_user", 0), 0, Service::Account::MAX_USERS - 1);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 2d4b98d9a..3ee0e037d 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -274,7 +274,7 @@ gamecard_path =
274 274
275[System] 275[System]
276# Whether the system is docked 276# Whether the system is docked
277# 1: Yes, 0 (default): No 277# 1 (default): Yes, 0: No
278use_docked_mode = 278use_docked_mode =
279 279
280# Allow the use of NFC in games 280# Allow the use of NFC in games
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 2497c71ae..4faf62ede 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -95,8 +95,6 @@ int main(int argc, char** argv) {
95 int option_index = 0; 95 int option_index = 0;
96 96
97 InitializeLogging(); 97 InitializeLogging();
98
99 char* endarg;
100#ifdef _WIN32 98#ifdef _WIN32
101 int argc_w; 99 int argc_w;
102 auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w); 100 auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w);
@@ -202,7 +200,7 @@ int main(int argc, char** argv) {
202 const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader); 200 const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader);
203 const u16 error_id = static_cast<u16>(load_result) - loader_id; 201 const u16 error_id = static_cast<u16>(load_result) - loader_id;
204 LOG_CRITICAL(Frontend, 202 LOG_CRITICAL(Frontend,
205 "While attempting to load the ROM requested, an error occured. Please " 203 "While attempting to load the ROM requested, an error occurred. Please "
206 "refer to the yuzu wiki for more information or the yuzu discord for " 204 "refer to the yuzu wiki for more information or the yuzu discord for "
207 "additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}", 205 "additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}",
208 loader_id, error_id, static_cast<Loader::ResultStatus>(error_id)); 206 loader_id, error_id, static_cast<Loader::ResultStatus>(error_id));
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
index 91684e96e..0aa143e1f 100644
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -86,7 +86,7 @@ void Config::ReadValues() {
86 Settings::values.touchscreen.diameter_y = 15; 86 Settings::values.touchscreen.diameter_y = 15;
87 87
88 Settings::values.use_docked_mode.SetValue( 88 Settings::values.use_docked_mode.SetValue(
89 sdl2_config->GetBoolean("Controls", "use_docked_mode", false)); 89 sdl2_config->GetBoolean("Controls", "use_docked_mode", true));
90 90
91 // Data Storage 91 // Data Storage
92 Settings::values.use_virtual_sd = 92 Settings::values.use_virtual_sd =
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h
index 3eb64e9d7..779c3791b 100644
--- a/src/yuzu_tester/default_ini.h
+++ b/src/yuzu_tester/default_ini.h
@@ -116,7 +116,7 @@ use_virtual_sd =
116 116
117[System] 117[System]
118# Whether the system is docked 118# Whether the system is docked
119# 1: Yes, 0 (default): No 119# 1 (default): Yes, 0: No
120use_docked_mode = 120use_docked_mode =
121 121
122# Allow the use of NFC in games 122# Allow the use of NFC in games
diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp
index 6435ffabb..09cf2ad77 100644
--- a/src/yuzu_tester/yuzu.cpp
+++ b/src/yuzu_tester/yuzu.cpp
@@ -242,7 +242,7 @@ int main(int argc, char** argv) {
242 const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader); 242 const u16 loader_id = static_cast<u16>(Core::System::ResultStatus::ErrorLoader);
243 const u16 error_id = static_cast<u16>(load_result) - loader_id; 243 const u16 error_id = static_cast<u16>(load_result) - loader_id;
244 LOG_CRITICAL(Frontend, 244 LOG_CRITICAL(Frontend,
245 "While attempting to load the ROM requested, an error occured. Please " 245 "While attempting to load the ROM requested, an error occurred. Please "
246 "refer to the yuzu wiki for more information or the yuzu discord for " 246 "refer to the yuzu wiki for more information or the yuzu discord for "
247 "additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}", 247 "additional help.\n\nError Code: {:04X}-{:04X}\nError Description: {}",
248 loader_id, error_id, static_cast<Loader::ResultStatus>(error_id)); 248 loader_id, error_id, static_cast<Loader::ResultStatus>(error_id));