diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
100 files changed, 2372 insertions, 640 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 2afecc55090f..260e32ef7bae 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -80,7 +80,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ - amdgpu_ring_mux.o amdgpu_xcp.o + amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index 02f4c6f9d4f6..576067d66bb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -330,6 +330,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, { struct list_head *reset_device_list = reset_context->reset_device_list; struct amdgpu_device *tmp_adev = NULL; + struct amdgpu_ras *con; int r; if (reset_device_list == NULL) @@ -355,7 +356,30 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, */ amdgpu_register_gpu_instance(tmp_adev); - /* Resume RAS */ + /* Resume RAS, ecc_irq */ + con = amdgpu_ras_get_context(tmp_adev); + if (!amdgpu_sriov_vf(tmp_adev) && con) { + if (tmp_adev->sdma.ras && + tmp_adev->sdma.ras->ras_block.ras_late_init) { + r = tmp_adev->sdma.ras->ras_block.ras_late_init(tmp_adev, + &tmp_adev->sdma.ras->ras_block.ras_comm); + if (r) { + dev_err(tmp_adev->dev, "SDMA failed to execute ras_late_init! ret:%d\n", r); + goto end; + } + } + + if (tmp_adev->gfx.ras && + tmp_adev->gfx.ras->ras_block.ras_late_init) { + r = tmp_adev->gfx.ras->ras_block.ras_late_init(tmp_adev, + &tmp_adev->gfx.ras->ras_block.ras_comm); + if (r) { + dev_err(tmp_adev->dev, "GFX failed to execute ras_late_init! ret:%d\n", r); + goto end; + } + } + } + amdgpu_ras_resume(tmp_adev); /* Update PSP FW topology after reset */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index afec09930efa..3d8a48f46b01 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -109,6 +109,8 @@ #include "amdgpu_mca.h" #include "amdgpu_ras.h" #include "amdgpu_xcp.h" +#include "amdgpu_seq64.h" +#include "amdgpu_reg_state.h" #define MAX_GPU_INSTANCE 64 @@ -248,6 +250,9 @@ extern int amdgpu_umsch_mm; extern int amdgpu_seamless; extern int amdgpu_user_partt_mode; +extern int amdgpu_agp; + +extern int amdgpu_wbrf; #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) @@ -467,6 +472,7 @@ struct amdgpu_fpriv { struct amdgpu_vm vm; struct amdgpu_bo_va *prt_va; struct amdgpu_bo_va *csa_va; + struct amdgpu_bo_va *seq64_va; struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; @@ -505,6 +511,31 @@ struct amdgpu_allowed_register_entry { bool grbm_indexed; }; +/** + * enum amd_reset_method - Methods for resetting AMD GPU devices + * + * @AMD_RESET_METHOD_NONE: The device will not be reset. + * @AMD_RESET_LEGACY: Method reserved for SI, CIK and VI ASICs. + * @AMD_RESET_MODE0: Reset the entire ASIC. Not currently available for the + * any device. + * @AMD_RESET_MODE1: Resets all IP blocks on the ASIC (SDMA, GFX, VCN, etc.) + * individually. Suitable only for some discrete GPU, not + * available for all ASICs. + * @AMD_RESET_MODE2: Resets a lesser level of IPs compared to MODE1. Which IPs + * are reset depends on the ASIC. Notably doesn't reset IPs + * shared with the CPU on APUs or the memory controllers (so + * VRAM is not lost). Not available on all ASICs. + * @AMD_RESET_BACO: BACO (Bus Alive, Chip Off) method powers off and on the card + * but without powering off the PCI bus. Suitable only for + * discrete GPUs. + * @AMD_RESET_PCI: Does a full bus reset using core Linux subsystem PCI reset + * and does a secondary bus reset or FLR, depending on what the + * underlying hardware supports. + * + * Methods available for AMD GPU driver for resetting the device. Not all + * methods are suitable for every device. User can override the method using + * module parameter `reset_method`. + */ enum amd_reset_method { AMD_RESET_METHOD_NONE = -1, AMD_RESET_METHOD_LEGACY = 0, @@ -584,6 +615,10 @@ struct amdgpu_asic_funcs { const struct amdgpu_video_codecs **codecs); /* encode "> 32bits" smn addressing */ u64 (*encode_ext_smn_addressing)(int ext_id); + + ssize_t (*get_reg_state)(struct amdgpu_device *adev, + enum amdgpu_reg_state reg_state, void *buf, + size_t max_size); }; /* @@ -756,6 +791,7 @@ struct amdgpu_mqd_prop { uint64_t eop_gpu_addr; uint32_t hqd_pipe_priority; uint32_t hqd_queue_priority; + bool allow_tunneling; bool hqd_active; }; @@ -985,6 +1021,9 @@ struct amdgpu_device { /* GDS */ struct amdgpu_gds gds; + /* for userq and VM fences */ + struct amdgpu_seq64 seq64; + /* KFD */ struct amdgpu_kfd_dev kfd; @@ -1105,6 +1144,7 @@ struct amdgpu_device { bool debug_vm; bool debug_largebar; bool debug_disable_soft_recovery; + bool debug_use_vram_fw_buf; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index b8412202a1b0..77e263660288 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -138,10 +138,14 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) amdgpu_device_gpu_recover(adev, NULL, &reset_context); } +static const struct drm_client_funcs kfd_client_funcs = { + .unregister = drm_client_release, +}; void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; int last_valid_bit; + int ret; amdgpu_amdkfd_gpuvm_init_mem_limits(); @@ -160,6 +164,12 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) .enable_mes = adev->enable_mes, }; + ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs); + if (ret) { + dev_err(adev->dev, "Failed to init DRM client: %d\n", ret); + return; + } + /* this is going to have a few of the MSBs set that we need to * clear */ @@ -198,6 +208,10 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); + if (adev->kfd.init_complete) + drm_client_register(&adev->kfd.client); + else + drm_client_release(&adev->kfd.client); amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; @@ -547,7 +561,7 @@ int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, struct amdgpu_device *adev = dst, *peer_adev; int num_links; - if (adev->asic_type != CHIP_ALDEBARAN) + if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)) return 0; if (src) @@ -684,10 +698,8 @@ err: void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) { enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE; - /* Temporary workaround to fix issues observed in some - * compute applications when GFXOFF is enabled on GFX11. - */ - if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11) { + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 && + ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) { pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled"); amdgpu_gfx_off_ctrl(adev, idle); } else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) && @@ -710,35 +722,6 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) return false; } -int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, - uint16_t vmid) -{ - if (adev->family == AMDGPU_FAMILY_AI) { - int i; - - for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) - amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); - } else { - amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0); - } - - return 0; -} - -int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, - enum TLB_FLUSH_TYPE flush_type, - uint32_t inst) -{ - bool all_hub = false; - - if (adev->family == AMDGPU_FAMILY_AI || - adev->family == AMDGPU_FAMILY_RV) - all_hub = true; - - return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst); -} - bool amdgpu_amdkfd_have_ |