diff options
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 102 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 20 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 14 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 10 |
6 files changed, 89 insertions, 65 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 1254a43ec96b..26b1f37c316e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -950,28 +950,30 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev) * @inst: xcc's instance number on a multi-XCC setup */ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, - int *wave_cnt, int *vmid, uint32_t inst) + struct kfd_cu_occupancy *queue_cnt, uint32_t inst) { int pipe_idx; int queue_slot; unsigned int reg_val; - + unsigned int wave_cnt; /* * Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID * parameters to read out waves in flight. Get VMID if there are * non-zero waves in flight. */ - *vmid = 0xFF; - *wave_cnt = 0; pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe; queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe; soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst); - reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, mmSPI_CSQ_WF_ACTIVE_COUNT_0) + - queue_slot); - *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; - if (*wave_cnt != 0) - *vmid = (RREG32_SOC15(GC, inst, mmCP_HQD_VMID) & - CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT; + reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, + mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot); + wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; + if (wave_cnt != 0) { + queue_cnt->wave_cnt += wave_cnt; + queue_cnt->doorbell_off = + (RREG32_SOC15(GC, inst, mmCP_HQD_PQ_DOORBELL_CONTROL) & + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK) >> + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + } } /** @@ -981,9 +983,8 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, * or more queues running and submitting waves to compute units. * * @adev: Handle of device from which to get number of waves in flight - * @pasid: Identifies the process for which this query call is invoked - * @pasid_wave_cnt: Output parameter updated with number of waves in flight that - * belong to process with given pasid + * @cu_occupancy: Array that gets filled with wave_cnt and doorbell offset + * for comparison later. * @max_waves_per_cu: Output parameter updated with maximum number of waves * possible per Compute Unit * @inst: xcc's instance number on a multi-XCC setup @@ -1011,30 +1012,24 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, * number of waves that are in flight for the queue at specified index. The * index ranges from 0 to 7. * - * If non-zero waves are in flight, read CP_HQD_VMID register to obtain VMID - * of the wave(s). + * If non-zero waves are in flight, store the corresponding doorbell offset + * of the queue, along with the wave count. * - * Determine if VMID from above step maps to pasid provided as parameter. If - * it matches agrregate the wave count. That the VMID will not match pasid is - * a normal condition i.e. a device is expected to support multiple queues - * from multiple proceses. + * Determine if the queue belongs to the process by comparing the doorbell + * offset against the process's queues. If it matches, aggregate the wave + * count for the process. * * Reading registers referenced above involves programming GRBM appropriately */ -void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, - int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst) +void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, + struct kfd_cu_occupancy *cu_occupancy, + int *max_waves_per_cu, uint32_t inst) { int qidx; - int vmid; int se_idx; - int sh_idx; int se_cnt; - int sh_cnt; - int wave_cnt; int queue_map; - int pasid_tmp; int max_queue_cnt; - int vmid_wave_cnt = 0; DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES); lock_spi_csq_mutexes(adev); @@ -1048,42 +1043,30 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, AMDGPU_MAX_QUEUES); max_queue_cnt = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; - sh_cnt = adev->gfx.config.max_sh_per_se; se_cnt = adev->gfx.config.max_shader_engines; for (se_idx = 0; se_idx < se_cnt; se_idx++) { - for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) { + amdgpu_gfx_select_se_sh(adev, se_idx, 0, 0xffffffff, inst); + queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS); + + /* + * Assumption: queue map encodes following schema: four + * pipes per each micro-engine, with each pipe mapping + * eight queues. This schema is true for GFX9 devices + * and must be verified for newer device families + */ + for (qidx = 0; qidx < max_queue_cnt; qidx++) { + /* Skip qeueus that are not associated with + * compute functions + */ + if (!test_bit(qidx, cp_queue_bitmap)) + continue; - amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff, inst); - queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS); + if (!(queue_map & (1 << qidx))) + continue; - /* - * Assumption: queue map encodes following schema: four - * pipes per each micro-engine, with each pipe mapping - * eight queues. This schema is true for GFX9 devices - * and must be verified for newer device families - */ - for (qidx = 0; qidx < max_queue_cnt; qidx++) { - - /* Skip qeueus that are not associated with - * compute functions - */ - if (!test_bit(qidx, cp_queue_bitmap)) - continue; - - if (!(queue_map & (1 << qidx))) - continue; - - /* Get number of waves in flight and aggregate them */ - get_wave_count(adev, qidx, &wave_cnt, &vmid, - inst); - if (wave_cnt != 0) { - pasid_tmp = - RREG32(SOC15_REG_OFFSET(OSSSYS, inst, - mmIH_VMID_0_LUT) + vmid); - if (pasid_tmp == pasid) - vmid_wave_cnt += wave_cnt; - } - } + /* Get number of waves in flight and aggregate them */ + get_wave_count(adev, qidx, &cu_occupancy[qidx], + inst); } } @@ -1092,7 +1075,6 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, unlock_spi_csq_mutexes(adev); /* Update the output parameters and return */ - *pasid_wave_cnt = vmid_wave_cnt; *max_waves_per_cu = adev->gfx.cu_info.simd_per_cu * adev->gfx.cu_info.max_waves_per_simd; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index 988c50ac3be0..b6a91a552aa4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -52,8 +52,9 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid); void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base); -void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, - int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst); +void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, + struct kfd_cu_occupancy *cu_occupancy, + int *max_waves_per_cu, uint32_t inst); void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 71b465f8d83e..29578550b478 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -3540,6 +3540,26 @@ int debug_refresh_runlist(struct device_queue_manager *dqm) return debug_map_and_unlock(dqm); } +bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + int doorbell_off) +{ + struct queue *q; + bool r = false; + + dqm_lock(dqm); + + list_for_each_entry(q, &qpd->queues_list, list) { + if (q->properties.doorbell_off == doorbell_off) { + r = true; + goto out; + } + } + +out: + dqm_unlock(dqm); + return r; +} #if defined(CONFIG_DEBUG_FS) static void seq_reg_dump(struct seq_file *m, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 08b40826ad1e..80be2036abea 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -324,6 +324,9 @@ void set_queue_snapshot_entry(struct queue *q, int debug_lock_and_unmap(struct device_queue_manager *dqm); int debug_map_and_unlock(struct device_queue_manager *dqm); int debug_refresh_runlist(struct device_queue_manager *dqm); +bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + int doorbell_off); static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index a902950cc060..d73841268c9b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -270,6 +270,10 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) struct kfd_node *dev = NULL; struct kfd_process *proc = NULL; struct kfd_process_device *pdd = NULL; + int i; + struct kfd_cu_occupancy cu_occupancy[AMDGPU_MAX_QUEUES]; + + memset(cu_occupancy, 0x0, sizeof(cu_occupancy)); pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy); dev = pdd->dev; @@ -287,9 +291,17 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) /* Collect wave count from device if it supports */ wave_cnt = 0; max_waves_per_cu = 0; - dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt, + + dev->kfd2kgd->get_cu_occupancy(dev->adev, cu_occupancy, &max_waves_per_cu, 0); + for (i = 0; i < AMDGPU_MAX_QUEUES; i++) { + if (cu_occupancy[i].wave_cnt != 0 && + kfd_dqm_is_queue_in_process(dev->dqm, &pdd->qpd, + cu_occupancy[i].doorbell_off)) + wave_cnt += cu_occupancy[i].wave_cnt; + } + /* Translate wave count to number of compute units */ cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu; return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt); diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 7744ca3ef4b1..e3e635a31b8a 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -71,6 +71,11 @@ enum kgd_memory_pool { KGD_POOL_FRAMEBUFFER = 3, }; +struct kfd_cu_occupancy { + u32 wave_cnt; + u32 doorbell_off; +}; + /** * enum kfd_sched_policy * @@ -313,8 +318,9 @@ struct kfd2kgd_calls { uint32_t grace_period, uint32_t *reg_offset, uint32_t *reg_data); - void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid, - int *wave_cnt, int *max_waves_per_cu, uint32_t inst); + void (*get_cu_occupancy)(struct amdgpu_device *adev, + struct kfd_cu_occupancy *cu_occupancy, + int *max_waves_per_cu, uint32_t inst); void (*program_trap_handler_settings)(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst); |