diff options
| author | Dave Airlie <airlied@redhat.com> | 2021-02-05 09:38:19 +1000 |
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2021-02-05 09:38:23 +1000 |
| commit | c5cb0db5fcce640574f5b73b2b4030b5b60f3700 (patch) | |
| tree | 8396d017fae43a2c491e310e0254929e56d62e14 | |
| parent | 54c820d05ee8156b1379ca0efd011b77e23cabb2 (diff) | |
| parent | c915ef890d5dc79f483e1ca3b3a5b5f1a170690c (diff) | |
| download | linux-c5cb0db5fcce640574f5b73b2b4030b5b60f3700.tar.gz linux-c5cb0db5fcce640574f5b73b2b4030b5b60f3700.tar.bz2 linux-c5cb0db5fcce640574f5b73b2b4030b5b60f3700.zip | |
Merge tag 'amd-drm-next-5.12-2021-02-03' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.12-2021-02-03:
amdgpu:
- Display fixes and cleanups
- Vangogh fixes
- Fix possible race when there are timeouts on two rings
- SR-IOV fixes
- Add missing license
- DCE 10/12 bpc fixes
- Display MALL fixes
- Fix SMU user preference settings persistence
- Fix retry in gem allocate
- Add new PCI DID
- Fix for manual fan speed control on cards where it was problematic
- Fix regression in pinning GTT
- Misc display fixes
- Misc code cleanups
amdkfd:
- Fix config handling
- Fix regression in buffer free
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210204045717.3823-1-alexander.deucher@amd.com
Signed-off-by: Dave Airlie <airlied@redhat.com>
80 files changed, 1623 insertions, 872 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index db96d69eb45e..c5343a5eecbe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -47,12 +47,8 @@ int amdgpu_amdkfd_init(void) amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh; amdgpu_amdkfd_total_mem_size *= si.mem_unit; -#ifdef CONFIG_HSA_AMD ret = kgd2kfd_init(); amdgpu_amdkfd_gpuvm_init_mem_limits(); -#else - ret = -ENOENT; -#endif kfd_initialized = !ret; return ret; @@ -696,86 +692,3 @@ bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd) return adev->have_atomics_support; } - -#ifndef CONFIG_HSA_AMD -bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) -{ - return false; -} - -void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) -{ -} - -int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) -{ - return 0; -} - -void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, - struct amdgpu_vm *vm) -{ -} - -struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) -{ - return NULL; -} - -int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) -{ - return 0; -} - -struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, - unsigned int asic_type, bool vf) -{ - return NULL; -} - -bool kgd2kfd_device_init(struct kfd_dev *kfd, - struct drm_device *ddev, - const struct kgd2kfd_shared_resources *gpu_resources) -{ - return false; -} - -void kgd2kfd_device_exit(struct kfd_dev *kfd) -{ -} - -void kgd2kfd_exit(void) -{ -} - -void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) -{ -} - -int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) -{ - return 0; -} - -int kgd2kfd_pre_reset(struct kfd_dev *kfd) -{ - return 0; -} - -int kgd2kfd_post_reset(struct kfd_dev *kfd) -{ - return 0; -} - -void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) -{ -} - -void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) -{ -} - -void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) -{ -} -#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index ea391ca7f2f1..a81d9cacf9b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -94,11 +94,6 @@ enum kgd_engine_type { KGD_ENGINE_MAX }; -struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, - struct mm_struct *mm); -bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); -struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); -int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo); struct amdkfd_process_info { /* List head of all VMs that belong to a KFD process */ @@ -132,8 +127,6 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); - -int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); @@ -153,6 +146,38 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd); int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, int queue_bit); +struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, + struct mm_struct *mm); +#if IS_ENABLED(CONFIG_HSA_AMD) +bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); +int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo); +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); +#else +static inline +bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) +{ + return false; +} + +static inline +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) +{ + return NULL; +} + +static inline +int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) +{ + return 0; +} + +static inline +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) +{ + return 0; +} +#endif /* Shared API */ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, @@ -215,8 +240,6 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, struct file *filp, u32 pasid, void **vm, void **process_info, struct dma_fence **ef); -void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, - struct amdgpu_vm *vm); void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); @@ -236,23 +259,43 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, struct kgd_mem *mem, void **kptr, uint64_t *size); int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, struct dma_fence **ef); - int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, struct kfd_vm_fault_info *info); - int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, struct dma_buf *dmabuf, uint64_t va, void *vm, struct kgd_mem **mem, uint64_t *size, uint64_t *mmap_offset); - -void amdgpu_amdkfd_gpuvm_init_mem_limits(void); -void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); - int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, struct tile_config *config); +#if IS_ENABLED(CONFIG_HSA_AMD) +void amdgpu_amdkfd_gpuvm_init_mem_limits(void); +void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, + struct amdgpu_vm *vm); +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); +#else +static inline +void amdgpu_amdkfd_gpuvm_init_mem_limits(void) +{ +} +static inline +void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ +} + +static inline +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) +{ +} +#endif /* KGD2KFD callbacks */ +int kgd2kfd_quiesce_mm(struct mm_struct *mm); +int kgd2kfd_resume_mm(struct mm_struct *mm); +int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, + struct dma_fence *fence); +#if IS_ENABLED(CONFIG_HSA_AMD) int kgd2kfd_init(void); void kgd2kfd_exit(void); struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, @@ -266,11 +309,68 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm); int kgd2kfd_pre_reset(struct kfd_dev *kfd); int kgd2kfd_post_reset(struct kfd_dev *kfd); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); -int kgd2kfd_quiesce_mm(struct mm_struct *mm); -int kgd2kfd_resume_mm(struct mm_struct *mm); -int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, - struct dma_fence *fence); void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask); +#else +static inline int kgd2kfd_init(void) +{ + return -ENOENT; +} +static inline void kgd2kfd_exit(void) +{ +} + +static inline +struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, + unsigned int asic_type, bool vf) +{ + return NULL; +} + +static inline +bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev, + const struct kgd2kfd_shared_resources *gpu_resources) +{ + return false; +} + +static inline void kgd2kfd_device_exit(struct kfd_dev *kfd) +{ +} + +static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) +{ +} + +static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) +{ + return 0; +} + +static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd) +{ + return 0; +} + +static inline int kgd2kfd_post_reset(struct kfd_dev *kfd) +{ + return 0; +} + +static inline +void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) +{ +} + +static inline +void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) +{ +} + +static inline +void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) +{ +} +#endif #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 0849b68e784f..ac0a432a9bf7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -26,6 +26,7 @@ #include <linux/sched/task.h> #include "amdgpu_object.h" +#include "amdgpu_gem.h" #include "amdgpu_vm.h" #include "amdgpu_amdkfd.h" #include "amdgpu_dma_buf.h" @@ -1152,7 +1153,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct sg_table *sg = NULL; uint64_t user_addr = 0; struct amdgpu_bo *bo; - struct amdgpu_bo_param bp; + struct drm_gem_object *gobj; u32 domain, alloc_domain; u64 alloc_flags; int ret; @@ -1220,19 +1221,14 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", va, size, domain_string(alloc_domain)); - memset(&bp, 0, sizeof(bp)); - bp.size = size; - bp.byte_align = 1; - bp.domain = alloc_domain; - bp.flags = alloc_flags; - bp.type = bo_type; - bp.resv = NULL; - ret = amdgpu_bo_create(adev, &bp, &bo); + ret = amdgpu_gem_object_create(adev, size, 1, alloc_domain, alloc_flags, + bo_type, NULL, &gobj); if (ret) { pr_debug("Failed to create BO on domain %s. ret %d\n", - domain_string(alloc_domain), ret); + domain_string(alloc_domain), ret); goto err_bo_create; } + bo = gem_to_amdgpu_bo(gobj); if (bo_type == ttm_bo_type_sg) { bo->tbo.sg = sg; bo->tbo.ttm->sg = sg; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 00b6ba5740f3..51bea409e513 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4211,7 +4211,6 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: - case CHIP_VANGOGH: break; default: goto disabled; @@ -4461,6 +4460,46 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) up_write(&adev->reset_sem); } +/* + * to lockup a list of amdgpu devices in a hive safely, if not a hive + * with multiple nodes, it will be similar as amdgpu_device_lock_adev. + * + * unlock won't require roll back. + */ +static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive) +{ + struct amdgpu_device *tmp_adev = NULL; + + if (adev->gmc.xgmi.num_physical_nodes > 1) { + if (!hive) { + dev_err(adev->dev, "Hive is NULL while device has multiple xgmi nodes"); + return -ENODEV; + } + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + if (!amdgpu_device_lock_adev(tmp_adev, hive)) + goto roll_back; + } + } else if (!amdgpu_device_lock_adev(adev, hive)) + return -EAGAIN; + + return 0; +roll_back: + if (!list_is_fi |
