diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
57 files changed, 2318 insertions, 921 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 3e0e2eb7e235..a87e42c2c8dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -88,7 +88,8 @@ amdgpu-y += \ gmc_v8_0.o \ gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \ gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \ - mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o + mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o \ + mmhub_v3_0_1.o # add UMC block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 30ce6bb6fa77..fb9399a999ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -223,6 +223,9 @@ static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS; static const bool __maybe_unused debug_evictions; /* = false */ static const bool __maybe_unused no_system_mem_limit; #endif +#ifdef CONFIG_HSA_AMD_P2P +extern bool pcie_p2p; +#endif extern int amdgpu_tmz; extern int amdgpu_reset_method; @@ -274,7 +277,7 @@ extern int amdgpu_vcnfw_log; #define CIK_CURSOR_WIDTH 128 #define CIK_CURSOR_HEIGHT 128 -/* smasrt shift bias level limits */ +/* smart shift bias level limits */ #define AMDGPU_SMARTSHIFT_MAX_BIAS (100) #define AMDGPU_SMARTSHIFT_MIN_BIAS (-100) @@ -667,6 +670,7 @@ enum amd_hw_ip_block_type { RSMU_HWIP, XGMI_HWIP, DCI_HWIP, + PCIE_HWIP, MAX_HWIP }; @@ -1044,10 +1048,18 @@ struct amdgpu_device { /* reset dump register */ uint32_t *reset_dump_reg_list; + uint32_t *reset_dump_reg_value; int num_regs; +#ifdef CONFIG_DEV_COREDUMP + struct amdgpu_task_info reset_task_info; + bool reset_vram_lost; + struct timespec64 reset_time; +#endif bool scpm_enabled; uint32_t scpm_status; + + struct work_struct reset_work; }; static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) @@ -1242,7 +1254,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev); bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job); -int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, +int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job); void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); int amdgpu_device_pci_reset(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 98ac53ee6bb5..130060834b4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -66,9 +66,7 @@ struct amdgpu_atif { struct amdgpu_atif_notifications notifications; struct amdgpu_atif_functions functions; struct amdgpu_atif_notification_cfg notification_cfg; -#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) struct backlight_device *bd; -#endif struct amdgpu_dm_backlight_caps backlight_caps; }; @@ -436,7 +434,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count); if (req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) { -#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) if (atif->bd) { DRM_DEBUG_DRIVER("Changing brightness to %d\n", req.backlight_level); @@ -447,7 +444,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, */ backlight_device_set_brightness(atif->bd, req.backlight_level); } -#endif } if (req.pending & ATIF_DGPU_DISPLAY_EVENT) { @@ -849,7 +845,6 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) { struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif; -#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) if (atif->notifications.brightness_change) { if (amdgpu_device_has_dc_support(adev)) { #if defined(CONFIG_DRM_AMD_DC) @@ -876,7 +871,6 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) } } } -#endif adev->acpi_nb.notifier_call = amdgpu_acpi_event; register_acpi_notifier(&adev->acpi_nb); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 1f8161cd507f..567597469a8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -33,6 +33,7 @@ #include <uapi/linux/kfd_ioctl.h> #include "amdgpu_ras.h" #include "amdgpu_umc.h" +#include "amdgpu_reset.h" /* Total memory size in system memory and all GPU VRAM. Used to * estimate worst case amount of memory to reserve for page tables @@ -122,6 +123,15 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, } } + +static void amdgpu_amdkfd_reset_work(struct work_struct *work) +{ + struct amdgpu_device *adev = container_of(work, struct amdgpu_device, + kfd.reset_work); + + amdgpu_device_gpu_recover(adev, NULL); +} + void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; @@ -180,6 +190,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, adev_to_drm(adev), &gpu_resources); + + INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work); } } @@ -247,7 +259,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev) { if (amdgpu_device_should_recover_gpu(adev)) - amdgpu_device_gpu_recover(adev, NULL); + amdgpu_reset_domain_schedule(adev->reset_domain, + &adev->kfd.reset_work); } int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, @@ -671,6 +684,8 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, goto err_ib_sched; } + /* Drop the initial kref_init count (see drm_sched_main as example) */ + dma_fence_put(f); ret = dma_fence_wait(f, false); err_ib_sched: @@ -714,7 +729,8 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, { bool all_hub = false; - if (adev->family == AMDGPU_FAMILY_AI) + if (adev->family == AMDGPU_FAMILY_AI || + adev->family == AMDGPU_FAMILY_RV) all_hub = true; return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index f8b9f27adcf5..73bf8b5f2aa9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -48,6 +48,7 @@ enum kfd_mem_attachment_type { KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */ KFD_MEM_ATT_USERPTR, /* SG bo to DMA map pages from a userptr bo */ KFD_MEM_ATT_DMABUF, /* DMAbuf to DMA map TTM BOs */ + KFD_MEM_ATT_SG /* Tag to DMA map SG BOs */ }; struct kfd_mem_attachment { @@ -96,6 +97,7 @@ struct amdgpu_kfd_dev { struct kfd_dev *dev; uint64_t vram_used; bool init_complete; + struct work_struct reset_work; }; enum kgd_engine_type { @@ -266,6 +268,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, void *drm_priv); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, @@ -279,10 +282,11 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( struct amdgpu_device *adev, struct kgd_mem *mem, bool intr); -int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, - struct kgd_mem *mem, void **kptr, uint64_t *size); -void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev, - struct kgd_mem *mem); +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, + void **kptr, uint64_t *size); +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem); + +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo); int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, struct dma_fence **ef); @@ -332,7 +336,7 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) } #endif /* KGD2KFD callbacks */ -int kgd2kfd_quiesce_mm(struct mm_struct *mm); +int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger); int kgd2kfd_resume_mm(struct mm_struct *mm); int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, struct dma_fence *fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 6b6d46e29e6e..2fcc6e079769 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -32,12 +32,19 @@ #include "amdgpu_dma_buf.h" #include <uapi/linux/kfd_ioctl.h> #include "amdgpu_xgmi.h" +#include "kfd_smi_events.h" /* Userptr restore delay, just long enough to allow consecutive VM * changes to accumulate */ #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 +/* + * Align VRAM allocations to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB + * BO chunk + */ +#define VRAM_ALLOCATION_ALIGN (1 << 21) + /* Impose limit on how much memory KFD can use */ static struct { uint64_t max_system_mem_limit; @@ -108,7 +115,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) * compromise that should work in most cases without reserving too * much memory for page tables unnecessarily (factor 16K, >> 14). */ -#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) +#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) static size_t amdgpu_amdkfd_acc_size(uint64_t size) { @@ -148,7 +155,13 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { system_mem_needed = acc_size; ttm_mem_needed = acc_size; - vram_needed = size; + + /* + * Conservatively round up the allocation requirement to 2 MB + * to avoid fragmentation caused by 4K allocations in the tail + * 2M BO chunk. + */ + vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { system_mem_needed = acc_size + size; ttm_mem_needed = acc_size; @@ -173,7 +186,9 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || (adev->kfd.vram_used + vram_needed > - adev->gmc.real_vram_size - reserved_for_pt)) { + adev->gmc.real_vram_size - + atomic64_read(&adev->vram_pin_size) - + reserved_for_pt)) { ret = -ENOMEM; goto release; } @@ -205,7 +220,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev, } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { kfd_mem_limit.system_mem_used -= acc_size; kfd_mem_limit.ttm_mem_used -= acc_size; - adev->kfd.vram_used -= size; + adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { kfd_mem_limit.system_mem_used -= (acc_size + size); kfd_mem_limit.ttm_mem_used -= acc_size; @@ -241,6 +256,42 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) kfree(bo->kfd_bo); } +/** + * @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information + * about USERPTR or DOOREBELL or MMIO BO. + * @adev: Device for which dmamap BO is being created + * @mem: BO of peer device that is being DMA mapped. Provides parameters + * in building the dmamap BO + * @bo_out: Output parameter updated with handle of dmamap BO + */ +static int +create_dmamap_sg_bo(struct amdgpu_device *adev, + struct kgd_mem *mem, struct amdgpu_bo **bo_out) +{ + struct drm_gem_object *gem_obj; + int ret, align; + + ret = amdgpu_bo_reserve(mem->bo, false); + if (ret) + return ret; + + align = 1; + ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, align, + AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE, + ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj); + + amdgpu_bo_unreserve(mem->bo); + + if (ret) { + pr_err("Error in creating DMA mappable SG BO on domain: %d\n", ret); + return -EINVAL; + } + + *bo_out = gem_to_amdgpu_bo(gem_obj); + (*bo_out)->parent = amdgpu_bo_ref(mem->bo); + return ret; +} + /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's * reservation object. * @@ -446,6 +497,38 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) return pte_flags; } +/** + * create_sg_table() - Create an sg_table for a contiguous DMA addr range + * @addr: The starting address to point to + * @size: Size of memory area in bytes being pointed to + * + * Allocates an instance of sg_table and initializes it to point to memory + * area specified by input parameters. The address used to build is assumed + * to be DMA mapped, if needed. + * + * DOORBELL or MMIO BOs use only one scatterlist node in their sg_table + * because they are physically contiguous. + * + * Return: Initialized instance of SG Table or NULL + */ +static struct sg_table *create_sg_table(uint64_t addr, uint32_t size) +{ + struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); + + if (!sg) + return NULL; + if (sg_alloc_table(sg, 1, GFP_KERNEL)) { + kfree(sg); + return NULL; + } + sg_dma_address(sg->sgl) = addr; + sg->sgl->length = size; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->sgl->dma_length = size; +#endif + return sg; +} + static int kfd_mem_dmamap_userptr(struct kgd_mem *mem, struct kfd_mem_attachment *attachment) @@ -510,6 +593,87 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment) return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); } +/** + * kfd_mem_dmamap_sg_bo() - Create DMA mapped sg_table to access DOORBELL or MMIO BO + * @mem: SG BO of the DOORBELL or MMIO resource on the owning device + * @attachment: Virtual address attachment of the BO on accessing device + * + * An access request from the device that owns DOORBELL does not require DMA mapping. + * This is because the request doesn't go through PCIe root complex i.e. it instead + * loops back. The need to DMA map arises only when accessing peer device's DOORBELL + * + * In contrast, all access requests for MMIO need to be DMA mapped without regard to + * device ownership. This is because access requests for MMIO go through PCIe root + * complex. + * + * This is accomplished in two steps: + * - Obtain DMA mapped address of DOORBELL or MMIO memory that could be used + * in updating requesting device's page table + * - Signal TTM to mark memory pointed to by requesting device's BO as GPU + * accessible. This allows an update of requesting device's page table + * with entries associated with DOOREBELL or MMIO memory + * + * This method is invoked in the following contexts: + * - Mapping of DOORBELL or MMIO BO of same or peer device + * - Validating an evicted DOOREBELL or MMIO BO on device seeking access + * + * Return: ZERO if successful, NON-ZERO otherwise + */ +static int +kfd_mem_dmamap_sg_bo(struct kgd_mem *mem, + struct kfd_mem_attachment *attachment) +{ + struct ttm_operation_ctx ctx = {.interruptible = true}; + struct amdgpu_bo *bo = attachment->bo_va->base.bo; + struct amdgpu_device *adev = attachment->adev; + struct ttm_tt *ttm = bo->tbo.ttm; + enum dma_data_direction dir; + dma_addr_t dma_addr; + bool mmio; + int ret; + + /* Expect SG Table of dmapmap BO to be NULL */ + mmio = (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP); + if (unlikely(ttm->sg)) { + pr_err("SG Table of %d BO for peer device is UNEXPECTEDLY NON-NULL", mmio); + return -EINVAL; + } + + dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + dma_addr = mem->bo->tbo.sg->sgl->dma_address; + pr_debug("%d BO size: %d\n", mmio, mem->bo->tbo.sg->sgl->length); + pr_debug("%d BO address before DMA mapping: %llx\n", mmio, dma_addr); + dma_addr = dma_map_resource(adev->dev, dma_addr, + mem->bo->tbo.sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC); + ret = dma_mapping_error(adev->dev, dma_addr); + if (unlikely(ret)) + return ret; + pr_debug("%d BO address after DMA mapping: %llx\n", mmio, dma_addr); + + ttm->sg = create_sg_table(dma_addr, mem->bo->tbo.sg->sgl->length); + if (unlikely(!ttm->sg)) { + ret = -ENOMEM; + goto unmap_sg; + } + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (unlikely(ret)) + goto free_sg; + + return ret; + +free_sg: + sg_free_table(ttm->sg); + kfree(ttm->sg); + ttm->sg = NULL; +unmap_sg: + dma_unmap_resource(adev->dev, dma_addr, mem->bo->tbo.sg->sgl->length, + dir, DMA_ATTR_SKIP_CPU_SYNC); + return ret; +} + static int kfd_mem_dmamap_attachment(struct kgd_mem *mem, struct kfd_mem_attachment *attachment) @@ -521,6 +685,8 @@ kfd_mem_dmamap_attachment(struct kgd_mem *mem, return kfd_mem_dmamap_userptr(mem, attachment); case KFD_MEM_ATT_DMABUF: return kfd_mem_dmamap_dmabuf(attachment); + case KFD_MEM_ATT_SG: + return kfd_mem_dmamap_sg_bo(mem, attachment); default: WARN_ON_ONCE(1); } @@ -561,6 +727,50 @@ kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment) ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); } +/** + * kfd_mem_dmaunmap_sg_bo() - Free DMA mapped sg_table of DOORBELL or MMIO BO + * @mem: SG BO of the DOORBELL or MMIO resource on the owning device + * @attachment: Virtual address attachment of the BO on accessing device + * + * The method performs following steps: + * - Signal TTM to mark memory pointed to by BO as GPU inaccessible + * - Free SG Table that is used to encapsulate DMA mapped memory of + * peer device's DOORBELL or MMIO memory + * + * This method is invoked in the following contexts: + * UNMapping of DOORBELL or MMIO BO on a device having access to its memory + * Eviction of DOOREBELL or MMIO BO on device having access to its memory + * + * Return: void + */ +static void +kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem, + struct kfd_mem_attachment *attachment) +{ + struct ttm_operation_ctx ctx = {.interruptible = true}; + struct amdgpu_bo *bo = attachment->bo_va->base.bo; + struct amdgpu_device *adev = attachment->adev; + struct ttm_tt *ttm = bo->tbo.ttm; + enum dma_data_direction dir; + + if (unlikely(!ttm->sg)) { + pr_err("SG Table of BO is UNEXPECTEDLY NULL"); + return; |