diff options
| author | Jonathan Kim <Jonathan.Kim@amd.com> | 2024-06-25 11:22:50 -0400 |
|---|---|---|
| committer | Alex Deucher <alexander.deucher@amd.com> | 2024-08-06 10:43:18 -0400 |
| commit | ee0a469cf9175aeb6131c0476c4a4a8eb5997dfa (patch) | |
| tree | 54532ea2f59cccefc7d0c99fcf5efe4776c75361 /drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | |
| parent | e89d2fec4cde967445e16e02e406481bac380cc4 (diff) | |
| download | linux-ee0a469cf9175aeb6131c0476c4a4a8eb5997dfa.tar.gz linux-ee0a469cf9175aeb6131c0476c4a4a8eb5997dfa.tar.bz2 linux-ee0a469cf9175aeb6131c0476c4a4a8eb5997dfa.zip | |
drm/amdkfd: support per-queue reset on gfx9
Support per-queue reset for GFX9. The recommendation is for the driver
to target reset the HW queue via a SPI MMIO register write.
Since this requires pipe and HW queue info and MEC FW is limited to
doorbell reports of hung queues after an unmap failure, scan the HW
queue slots defined by SET_RESOURCES first to identify the user queue
candidates to reset.
Only signal reset events to processes that have had a queue reset.
If queue reset fails, fall back to GPU reset.
Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 5a35a8ca8922..32f28c12077b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -1144,6 +1144,89 @@ void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, kgd_gfx_v9_unlock_srbm(adev, inst); } +uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst) +{ + uint32_t low, high; + uint64_t queue_addr = 0; + + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); + amdgpu_gfx_rlc_enter_safe_mode(adev, inst); + + if (!RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE)) + goto unlock_out; + + low = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE); + high = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI); + + /* only concerned with user queues. */ + if (!high) + goto unlock_out; + + queue_addr = (((queue_addr | high) << 32) | low) << 8; + +unlock_out: + amdgpu_gfx_rlc_exit_safe_mode(adev, inst); + kgd_gfx_v9_release_queue(adev, inst); + + return queue_addr; +} + +uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id, + uint32_t inst, unsigned int utimeout) +{ + uint32_t low, high, temp; + unsigned long end_jiffies; + uint64_t queue_addr = 0; + + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst); + amdgpu_gfx_rlc_enter_safe_mode(adev, inst); + + if (!RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE)) + goto unlock_out; + + low = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE); + high = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_BASE_HI); + + /* only concerned with user queues. */ + if (!high) + goto unlock_out; + + queue_addr = (((queue_addr | high) << 32) | low) << 8; + + pr_debug("Attempting queue reset on XCC %i pipe id %i queue id %i\n", + inst, pipe_id, queue_id); + + /* assume previous dequeue request issued will take affect after reset */ + WREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_COMPUTE_QUEUE_RESET, 0x1); + + end_jiffies = (utimeout * HZ / 1000) + jiffies; + while (true) { + temp = RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE); + + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) + break; + + if (time_after(jiffies, end_jiffies)) { + queue_addr = 0; + break; + } + + usleep_range(500, 1000); + } + + pr_debug("queue reset on XCC %i pipe id %i queue id %i %s\n", + inst, pipe_id, queue_id, !!queue_addr ? "succeeded!" : "failed!"); + +unlock_out: + amdgpu_gfx_rlc_exit_safe_mode(adev, inst); + kgd_gfx_v9_release_queue(adev, inst); + + return queue_addr; +} + const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, @@ -1172,4 +1255,6 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, + .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, + .hqd_reset = kgd_gfx_v9_hqd_reset }; |
