diff options
38 files changed, 573 insertions, 495 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index de6ba0d4b860..af37f2ef4438 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -35,6 +35,7 @@ #include "amdgpu_dma_buf.h" #include <uapi/linux/kfd_ioctl.h> #include "amdgpu_xgmi.h" +#include "kfd_priv.h" #include "kfd_smi_events.h" #include <drm/ttm/ttm_tt.h> diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 5c8023cba196..4ebfff6b6c55 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -26,7 +26,7 @@ #include "amdgpu_amdkfd.h" #include "kfd_smi_events.h" -static bool cik_event_interrupt_isr(struct kfd_dev *dev, +static bool cik_event_interrupt_isr(struct kfd_node *dev, const uint32_t *ih_ring_entry, uint32_t *patched_ihre, bool *patched_flag) @@ -85,7 +85,7 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, !amdgpu_no_queue_eviction_on_vm_fault); } -static void cik_event_interrupt_wq(struct kfd_dev *dev, +static void cik_event_interrupt_wq(struct kfd_node *dev, const uint32_t *ih_ring_entry) { const struct cik_ih_ring_entry *ihre = diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 81d07ecf666d..eb0b0b38f10e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -293,7 +293,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_create_queue_args *args = data; - struct kfd_dev *dev; + struct kfd_node *dev; int err = 0; unsigned int queue_id; struct kfd_process_device *pdd; @@ -328,7 +328,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, } if (!pdd->doorbell_index && - kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) { + kfd_alloc_process_doorbells(dev->kfd, &pdd->doorbell_index) < 0) { err = -ENOMEM; goto err_alloc_doorbells; } @@ -336,7 +336,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work * on unmapped queues for usermode queue oversubscription (no aggregated doorbell) */ - if (dev->shared_resources.enable_mes && + if (dev->kfd->shared_resources.enable_mes && ((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) { struct amdgpu_bo_va_mapping *wptr_mapping; @@ -887,7 +887,7 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep, { struct kfd_ioctl_set_scratch_backing_va_args *args = data; struct kfd_process_device *pdd; - struct kfd_dev *dev; + struct kfd_node *dev; long err; mutex_lock(&p->mutex); @@ -1006,18 +1006,18 @@ err_drm_file: return ret; } -bool kfd_dev_is_large_bar(struct kfd_dev *dev) +bool kfd_dev_is_large_bar(struct kfd_node *dev) { if (debug_largebar) { pr_debug("Simulate large-bar allocation on non large-bar machine\n"); return true; } - if (dev->use_iommu_v2) + if (dev->kfd->use_iommu_v2) return false; - if (dev->local_mem_info.local_mem_size_private == 0 && - dev->local_mem_info.local_mem_size_public > 0) + if (dev->kfd->local_mem_info.local_mem_size_private == 0 && + dev->kfd->local_mem_info.local_mem_size_public > 0) return true; return false; } @@ -1041,7 +1041,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, struct kfd_ioctl_alloc_memory_of_gpu_args *args = data; struct kfd_process_device *pdd; void *mem; - struct kfd_dev *dev; + struct kfd_node *dev; int idr_handle; long err; uint64_t offset = args->mmap_offset; @@ -1105,7 +1105,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, } if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { - if (args->size != kfd_doorbell_process_slice(dev)) { + if (args->size != kfd_doorbell_process_slice(dev->kfd)) { err = -EINVAL; goto err_unlock; } @@ -1231,7 +1231,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, struct kfd_ioctl_map_memory_to_gpu_args *args = data; struct kfd_process_device *pdd, *peer_pdd; void *mem; - struct kfd_dev *dev; + struct kfd_node *dev; long err = 0; int i; uint32_t *devices_arr = NULL; @@ -1405,7 +1405,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, args->n_success = i+1; } - flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev); + flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev->kfd); if (flush_tlb) { err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev, (struct kgd_mem *) mem, true); @@ -1445,7 +1445,7 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep, int retval; struct kfd_ioctl_alloc_queue_gws_args *args = data; struct queue *q; - struct kfd_dev *dev; + struct kfd_node *dev; mutex_lock(&p->mutex); q = pqm_get_user_queue(&p->pqm, args->queue_id); @@ -1482,7 +1482,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_get_dmabuf_info_args *args = data; - struct kfd_dev *dev = NULL; + struct kfd_node *dev = NULL; struct amdgpu_device *dmabuf_adev; void *metadata_buffer = NULL; uint32_t flags; @@ -1596,7 +1596,7 @@ static int kfd_ioctl_export_dmabuf(struct file *filep, struct kfd_ioctl_export_dmabuf_args *args = data; struct kfd_process_device *pdd; struct dma_buf *dmabuf; - struct kfd_dev *dev; + struct kfd_node *dev; void *mem; int ret = 0; @@ -2178,7 +2178,7 @@ static int criu_restore_devices(struct kfd_process *p, } for (i = 0; i < args->num_devices; i++) { - struct kfd_dev *dev; + struct kfd_node *dev; struct kfd_process_device *pdd; struct file *drm_file; @@ -2240,7 +2240,7 @@ static int criu_restore_devices(struct kfd_process *p, } if (!pdd->doorbell_index && - kfd_alloc_process_doorbells(pdd->dev, &pdd->doorbell_index) < 0) { + kfd_alloc_process_doorbells(pdd->dev->kfd, &pdd->doorbell_index) < 0) { ret = -ENOMEM; goto exit; } @@ -2268,7 +2268,8 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, u64 offset; if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { - if (bo_bucket->size != kfd_doorbell_process_slice(pdd->dev)) + if (bo_bucket->size != + kfd_doorbell_process_slice(pdd->dev->kfd)) return -EINVAL; offset = kfd_get_process_doorbells(pdd); @@ -2350,7 +2351,7 @@ static int criu_restore_bo(struct kfd_process *p, /* now map these BOs to GPU/s */ for (j = 0; j < p->n_pdds; j++) { - struct kfd_dev *peer; + struct kfd_node *peer; struct kfd_process_device *peer_pdd; if (!bo_priv->mapped_gpuids[j]) @@ -2947,7 +2948,7 @@ err_i1: return retcode; } -static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process, +static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process, struct vm_area_struct *vma) { phys_addr_t address; @@ -2981,7 +2982,7 @@ static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process, static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) { struct kfd_process *process; - struct kfd_dev *dev = NULL; + struct kfd_node *dev = NULL; unsigned long mmap_offset; unsigned int gpu_id; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 475e47027354..f5aebba31e88 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1405,7 +1405,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, return i; } -int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info) +int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info) { int num_of_cache_types = 0; @@ -1524,7 +1524,7 @@ int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pca case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): num_of_cache_types = - kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info); + kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info); break; default: *pcache_info = dummy_cache_info; @@ -1858,7 +1858,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) } static int kfd_fill_gpu_memory_affinity(int *avail_size, - struct kfd_dev *kdev, uint8_t type, uint64_t size, + struct kfd_node *kdev, uint8_t type, uint64_t size, struct crat_subtype_memory *sub_type_hdr, uint32_t proximity_domain, const struct kfd_local_mem_info *local_mem_info) @@ -1887,7 +1887,7 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size, } #ifdef CONFIG_ACPI_NUMA -static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev) +static void kfd_find_numa_node_in_srat(struct kfd_node *kdev) { struct acpi_table_header *table_header = NULL; struct acpi_subtable_header *sub_header = NULL; @@ -1982,7 +1982,7 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev) * Return 0 if successful else return -ve value */ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, - struct kfd_dev *kdev, + struct kfd_node *kdev, struct crat_subtype_iolink *sub_type_hdr, uint32_t proximity_domain) { @@ -2044,8 +2044,8 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, } static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, - struct kfd_dev *kdev, - struct kfd_dev *peer_kdev, + struct kfd_node *kdev, + struct kfd_node *peer_kdev, struct crat_subtype_iolink *sub_type_hdr, uint32_t proximity_domain_from, uint32_t proximity_domain_to) @@ -2081,7 +2081,7 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, * [OUT] actual size of data filled in crat_image */ static int kfd_create_vcrat_image_gpu(void *pcrat_image, - size_t *size, struct kfd_dev *kdev, + size_t *size, struct kfd_node *kdev, uint32_t proximity_domain) { struct crat_header *crat_table = (struct crat_header *)pcrat_image; @@ -2153,7 +2153,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, /* Check if this node supports IOMMU. During parsing this flag will * translate to HSA_CAP_ATS_PRESENT */ - if (!kfd_iommu_check_device(kdev)) + if (!kfd_iommu_check_device(kdev->kfd)) cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; crat_table->length += sub_type_hdr->length; @@ -2164,7 +2164,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, * report the total FB size (public+private) as a single * private heap. */ - local_mem_info = kdev->local_mem_info; + local_mem_info = kdev->kfd->local_mem_info; sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + sub_type_hdr->length); @@ -2216,12 +2216,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, * (from other GPU to this GPU) will be added * in kfd_parse_subtype_iolink. */ - if (kdev->hive_id) { + if (kdev->kfd->hive_id) { for (nid = 0; nid < proximity_domain; ++nid) { peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid); if (!peer_dev->gpu) continue; - if (peer_dev->gpu->hive_id != kdev->hive_id) + if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id) continue; sub_type_hdr = (typeof(sub_type_hdr))( (char *)sub_type_hdr + @@ -2255,12 +2255,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU * -- this option is not currently implemented. * The assumption is that all AMD APUs will have CRAT - * @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU + * @kdev: Valid kfd_node required if flags contain COMPUTE_UNIT_GPU * * Return 0 if successful else return -ve value */ int kfd_create_crat_image_virtual(void **crat_image, size_t *size, - int flags, struct kfd_dev *kdev, + int flags, struct kfd_node *kdev, uint32_t proximity_domain) { void *pcrat_image = NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h index 8d1e8ba58dee..3d0e533b93b9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h @@ -293,7 +293,7 @@ struct crat_subtype_generic { #pragma pack() -struct kfd_dev; +struct kfd_node; /* Static table to describe GPU Cache information */ struct kfd_gpu_cache_info { @@ -305,14 +305,14 @@ struct kfd_gpu_cache_info { */ uint32_t num_cu_shared; }; -int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info); +int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info); int kfd_create_crat_image_acpi(void **crat_image, size_t *size); void kfd_destroy_crat_image(void *crat_image); int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, uint32_t proximity_domain); int kfd_create_crat_image_virtual(void **crat_image, size_t *size, - int flags, struct kfd_dev *kdev, + int flags, struct kfd_node *kdev, uint32_t proximity_domain); #endif /* KFD_CRAT_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c index ad5a40a685ac..4a5a0a4e00f2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c @@ -43,7 +43,7 @@ static int kfd_debugfs_hang_hws_read(struct seq_file *m, void *data) static ssize_t kfd_debugfs_hang_hws_write(struct file *file, const char __user *user_buf, size_t size, loff_t *ppos) { - struct kfd_dev *dev; + struct kfd_node *dev; char tmp[16]; uint32_t gpu_id; int ret = -EINVAL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 1510041a6ee1..23d9a7f77055 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -61,7 +61,7 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, static void kfd_gtt_sa_fini(struct kfd_dev *kfd); static int kfd_resume_iommu(struct kfd_dev *kfd); -static int kfd_resume(struct kfd_dev *kfd); +static int kfd_resume(struct kfd_node *kfd); static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) { @@ -441,8 +441,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) memset(&kfd->doorbell_available_index, 0, sizeof(kfd->doorbell_available_index)); - atomic_set(&kfd->sram_ecc_flag, 0); - ida_init(&kfd->doorbell_ida); return kfd; @@ -489,41 +487,106 @@ static void kfd_cwsr_init(struct kfd_dev *kfd) } } -static int kfd_gws_init(struct kfd_dev *kfd) +static int kfd_gws_init(struct kfd_node *node) { int ret = 0; + struct kfd_dev *kfd = node->kfd; - if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) + if (node->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) return 0; - if (hws_gws_support || (KFD_IS_SOC15(kfd) && - ((KFD_GC_VERSION(kfd) == IP_VERSION(9, 0, 1) + if (hws_gws_support || (KFD_IS_SOC15(node) && + ((KFD_GC_VERSION(node) == IP_VERSION(9, 0, 1) && kfd->mec2_fw_version >= 0x81b3) || - (KFD_GC_VERSION(kfd) <= IP_VERSION(9, 4, 0) + (KFD_GC_VERSION(node) <= IP_VERSION(9, 4, 0) && kfd->mec2_fw_version >= 0x1b3) || - (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1) + (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 1) && kfd->mec2_fw_version >= 0x30) || - (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) + (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 2) && kfd->mec2_fw_version >= 0x28) || - (KFD_GC_VERSION(kfd) >= IP_VERSION(10, 3, 0) - && KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0) + (KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0) + && KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0) && kfd->mec2_fw_version >= 0x6b)))) - ret = amdgpu_amdkfd_alloc_gws(kfd->adev, - kfd->adev->gds.gws_size, &kfd->gws); + ret = amdgpu_amdkfd_alloc_gws(node->adev, + node->adev->gds.gws_size, &node->gws); return ret; } -static void kfd_smi_init(struct kfd_dev *dev) +static void kfd_smi_init(struct kfd_node *dev) { INIT_LIST_HEAD(&dev->smi_clients); spin_lock_init(&dev->smi_lock); } +static int kfd_init_node(struct kfd_node *node) +{ + int err = -1; + + if (kfd_interrupt_init(node)) { + dev_err(kfd_device, "Error initializing interrupts\n"); + goto kfd_interrupt_error; + } + + node->dqm = device_queue_manager_init(node); + if (!node->dqm) { + dev_err(kfd_device, "Error initializing queue manager\n"); + goto device_queue_manager_error; + } + + if (kfd_gws_init(node)) { + dev_err(kfd_device, "Could not allocate %d gws\n", + node->adev->gds.gws_size); + goto gws_error; + } + + if (kfd_resume(node)) + goto kfd_resume_error; + + if (kfd_topology_add_device(node)) { + dev_err(kfd_device, "Error adding device to topology\n"); + goto kfd_topology_add_device_error; + } + + kfd_smi_init(node); + + return 0; + +kfd_topology_add_device_error: +kfd_resume_error: +gws_error: + device_queue_manager_uninit(node->dqm); +device_queue_manager_error: + kfd_interrupt_exit(node); +kfd_interrupt_error: + if (node->gws) + amdgpu_amdkfd_free_gws(node->adev, node->gws); + + /* Cleanup the node memory here */ + kfree(node); + return err; +} + +static void kfd_cleanup_node(struct kfd_dev *kfd) +{ + struct kfd_node *knode = kfd->node; + + device_queue_manager_uninit(knode->dqm); + kfd_interrupt_exit(knode); + kfd_topology_remove_device(knode); + if (knode->gws) + amdgpu_amdkfd_free_gws(knode->adev, knode->gws); + kfree(knode); + kfd->node = NULL; +} + bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources) { unsigned int size, map_process_packet_size; + struct kfd_node *node; + uint32_t first_vmid_kfd, last_vmid_kfd, vmid_num_kfd; + unsigned int max_proc_per_quantum; kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, KGD_ENGINE_MEC1); @@ -533,10 +596,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, KGD_ENGINE_SDMA1); kfd->shared_resources = *gpu_resources; - kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; - kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; - kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd - - kfd->vm_info.first_vmid_kfd + 1; + first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; + last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; + vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1; /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. * 32 and 64-bit requests are possible and must be @@ -557,9 +619,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, /* Verify module parameters regarding mapped process number*/ if (hws_max_conc_proc >= 0) - kfd->max_proc_per_quantum = min((u32)hws_max_conc_proc, kfd->vm_info.vmid_num_kfd); + max_proc_per_quantum = min((u32)hws_max_conc_proc, vmid_num_kfd); else - kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; + max_proc_per_quantum = vmid_num_kfd; /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * @@ -609,26 +671,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->noretry = kfd->adev->gmc.noretry; - if (kfd_interrupt_init(kfd)) { - dev_err(kfd_device, "Error initializing interrupts\n"); - goto kfd_interrupt_error; - } - - kfd->dqm = device_queue_manager_init(kfd); - if (!kfd->dqm) { - dev_err(kfd_device, "Error initializing queue manager\n"); - goto device_queue_manager_error; - } - - /* If supported on this device, allocate global GWS that is shared - * by all KFD processes - */ - if (kfd_gws_init(kfd)) { - dev_err(kfd_device, "Could not allocate %d gws\n", - kfd->adev->gds.gws_size); - goto gws_error; - } - /* If CRAT is broken, won't set iommu enabled */ kfd_double_confirm_iommu_support(kfd); @@ -642,46 +684,54 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, svm_migrate_init(kfd->adev); - if (kfd_resume_iommu(kfd)) - goto device_iommu_error; - - if (kfd_resume(kfd)) - goto kfd_resume_error; - - amdgpu_amdkfd_get_local_mem_info(kfd->adev, &kfd->local_mem_info); + /* Allocate the KFD node */ + node = kzalloc(sizeof(struct kfd_node), GFP_KERNEL); + if (!node) { + dev_err(kfd_device, "Error allocating KFD node\n"); + goto node_alloc_error; + } - if (kfd_topology_add_device(kfd)) { - dev_err(kfd_device, "Error adding device to topology\n"); - goto kfd_topology_add_device_error; + node->adev = kfd->adev; + node->kfd = kfd; + node->kfd2kgd = kfd->kfd2kgd; + node->vm_info.vmid_num_kfd = vmid_num_kfd; + node->vm_info.first_vmid_kfd = first_vmid_kfd; + node->vm_info.last_vmid_kfd = last_vmid_kfd; + node->max_proc_per_quantum = max_proc_per_quantum; + atomic_set(&node->sram_ecc_flag, 0); + + /* Initialize the KFD node */ + if (kfd_init_node(node)) { + dev_err(kfd_device, "Error initializing KFD node\n"); + goto node_init_error; } + kfd->node = node; - kfd_smi_init(kfd); + if (kfd_resume_iommu(kfd)) + goto kfd_resume_iommu_error; + + amdgpu_amdkfd_get_local_mem_info(kfd->adev, &kfd->local_mem_info); kfd->init_complete = true; dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor, kfd->adev->pdev->device); pr_debug("Starting kfd with the following scheduling policy %d\n", - kfd->dqm->sched_policy); + node->dqm->sched_policy); goto out; -kfd_topology_add_device_error: -kfd_resume_error: +kfd_resume_iommu_error: + kfd_cleanup_node(kfd); +node_init_error: +node_alloc_error: device_iommu_error: -gws_error: - device_queue_manager_uninit(kfd->dqm); -device_queue_manager_error: - kfd_interrupt_exit(kfd); -kfd_interrupt_error: kfd_doorbell_fini(kfd); kfd_doorbell_error: kfd_gtt_sa_fini(kfd); kfd_gtt_sa_init_error: amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); alloc_gtt_mem_failure: - if (kfd->gws) - amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws); dev_err(kfd_device, "device %x:%x NOT added due to errors\n", kfd->adev->pdev->vendor, kfd->adev->pdev->device); @@ -692,15 +742,11 @@ out: void kgd2kfd_device_exit(struct kfd_dev *kfd) { if (kfd->init_complete) { - device_queue_manager_uninit(kfd->dqm); - kfd_interrupt_exit(kfd); - kfd_topology_remove_device(kfd); + kfd_cleanup_node(kfd); kfd_doorbell_fini(kfd); ida_destroy(&kfd->doorbell_ida); kfd_gtt_sa_fini(kfd); amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); - if (kfd->gws) - amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws); } kfree(kfd); @@ -708,16 +754,18 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) int kgd2kfd_pre_reset(struct kfd_dev *kfd) { + struct kfd_node *node = kfd->node; + if (!kfd->init_complete) return 0; - kfd_smi_event_update_gpu_reset(kfd, false); + kfd_smi_event_update_gpu_reset(node, false); - kfd->dqm->ops.pre_reset(kfd->dqm); + node->dqm->ops.pre_reset(node->dqm); kgd2kfd_suspend(kfd, false); - kfd_signal_reset_event(kfd); + kfd_signal_reset_event(node); return 0; } @@ -730,18 +778,19 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd) int kgd2kfd_post_reset(struct kfd_dev *kfd) { int ret; + struct kfd_node *node = kfd->node; if (!kfd->init_complete) return 0; - ret = kfd_resume(kfd); + ret = kfd_resume(node); if (ret) return ret; atomic_dec(&kfd_locked); - atomic_set(&kfd->sram_ecc_flag, 0); + atomic_set(&node->sram_ecc_flag, 0); - kfd_smi_event_update_gpu_reset(kfd, true); + kfd_smi_event_update_gpu_reset(node, true); return 0; } @@ -753,6 +802,8 @@ bool kfd_is_locked(void) void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) { + struct kfd_node *node = kfd->node; + if (!kfd->init_complete) return; @@ -763,18 +814,19 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) kfd_suspend_all_processes(); } - kfd->dqm->ops.stop(kfd->dqm); + node->dqm->ops.stop(node->dqm); kfd_iommu_suspend(kfd); } int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) { int ret, count; + struct kfd_node *node = kfd->node; if (!kfd->init_complete) return 0; - ret = kfd_resume(kfd); + ret = kfd_resume(node); if (ret) return ret; @@ -809,15 +861,15 @@ static int kfd_resume_iommu(struct kfd_dev *kfd) return err; } -static int kfd_resume(struct kfd_dev *kfd) +static int kfd_resume(struct kfd_node *no |
