summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c57
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c353
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c13
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c16
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c4
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c4
-rw-r--r--drivers/gpu/drm/amd/powerplay/amdgpu_smu.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c2
39 files changed, 184 insertions, 469 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 3e82a11577d9..08f80ca3b296 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -949,9 +949,9 @@ struct amdgpu_device {
bool in_suspend;
bool in_hibernate;
- atomic_t in_gpu_reset;
+ bool in_gpu_reset;
enum pp_mp1_state mp1_state;
- struct rw_semaphore reset_sem;
+ struct mutex lock_reset;
struct amdgpu_doorbell_index doorbell_index;
struct mutex notifier_lock;
@@ -1266,9 +1266,4 @@ static inline bool amdgpu_is_tmz(struct amdgpu_device *adev)
return adev->gmc.tmz_enabled;
}
-static inline bool amdgpu_in_reset(struct amdgpu_device *adev)
-{
- return atomic_read(&adev->in_gpu_reset) ? true : false;
-}
-
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 9738dccb1c2c..0effc1d46824 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -244,14 +244,11 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
if (cp_mqd_gfx9)
bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
- if (!down_read_trylock(&adev->reset_sem))
- return -EIO;
-
r = amdgpu_bo_create(adev, &bp, &bo);
if (r) {
dev_err(adev->dev,
"failed to allocate BO for amdkfd (%d)\n", r);
- goto err;
+ return r;
}
/* map the buffer */
@@ -286,7 +283,6 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
amdgpu_bo_unreserve(bo);
- up_read(&adev->reset_sem);
return 0;
allocate_mem_kmap_bo_failed:
@@ -295,25 +291,19 @@ allocate_mem_pin_bo_failed:
amdgpu_bo_unreserve(bo);
allocate_mem_reserve_bo_failed:
amdgpu_bo_unref(&bo);
-err:
- up_read(&adev->reset_sem);
+
return r;
}
void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
- down_read(&adev->reset_sem);
-
amdgpu_bo_reserve(bo, true);
amdgpu_bo_kunmap(bo);
amdgpu_bo_unpin(bo);
amdgpu_bo_unreserve(bo);
amdgpu_bo_unref(&(bo));
-
- up_read(&adev->reset_sem);
}
int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
@@ -345,14 +335,9 @@ int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
- down_read(&adev->reset_sem);
-
amdgpu_bo_unref(&bo);
-
- up_read(&adev->reset_sem);
}
uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
@@ -626,15 +611,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
/* This works for NO_HWS. TODO: need to handle without knowing VMID */
job->vmid = vmid;
- if (!down_read_trylock(&adev->reset_sem)) {
- ret = -EIO;
- goto err_ib_sched;
- }
-
ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
- up_read(&adev->reset_sem);
-
if (ret) {
DRM_ERROR("amdgpu: failed to schedule IB.\n");
goto err_ib_sched;
@@ -670,9 +648,6 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
- if (!down_read_trylock(&adev->reset_sem))
- return -EIO;
-
if (adev->family == AMDGPU_FAMILY_AI) {
int i;
@@ -682,8 +657,6 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
}
- up_read(&adev->reset_sem);
-
return 0;
}
@@ -692,18 +665,11 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
const uint32_t flush_type = 0;
bool all_hub = false;
- int ret = -EIO;
if (adev->family == AMDGPU_FAMILY_AI)
all_hub = true;
- if (down_read_trylock(&adev->reset_sem)) {
- ret = amdgpu_gmc_flush_gpu_tlb_pasid(adev,
- pasid, flush_type, all_hub);
- up_read(&adev->reset_sem);
- }
-
- return ret;
+ return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
}
bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index b0dcc800251e..bf927f432506 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -542,7 +542,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
uint32_t temp;
struct v10_compute_mqd *m = get_mqd(mqd);
- if (amdgpu_in_reset(adev))
+ if (adev->in_gpu_reset)
return -EIO;
#if 0
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 275f20399373..744366c7ee85 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -423,7 +423,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
unsigned long flags, end_jiffies;
int retry;
- if (amdgpu_in_reset(adev))
+ if (adev->in_gpu_reset)
return -EIO;
acquire_queue(kgd, pipe_id, queue_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 4997189d8b36..feab4cc6e836 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -419,7 +419,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
int retry;
struct vi_mqd *m = get_mqd(mqd);
- if (amdgpu_in_reset(adev))
+ if (adev->in_gpu_reset)
return -EIO;
acquire_queue(kgd, pipe_id, queue_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index d5d997fe6aa4..e4c274bd35c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -539,7 +539,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
uint32_t temp;
struct v9_mqd *m = get_mqd(mqd);
- if (amdgpu_in_reset(adev))
+ if (adev->in_gpu_reset)
return -EIO;
acquire_queue(kgd, pipe_id, queue_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index fcf72f337785..62cb510e2cc4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1194,9 +1194,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
return -EINVAL;
}
- if (!down_read_trylock(&adev->reset_sem))
- return -EIO;
-
*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
if (!*mem) {
ret = -ENOMEM;
@@ -1263,7 +1260,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if (offset)
*offset = amdgpu_bo_mmap_offset(bo);
- up_read(&adev->reset_sem);
return 0;
allocate_init_user_pages_failed:
@@ -1281,9 +1277,6 @@ err:
sg_free_table(sg);
kfree(sg);
}
-
- up_read(&adev->reset_sem);
-
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index a3b150304dae..a512ccbc4dea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1292,8 +1292,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
parser.adev = adev;
parser.filp = filp;
- down_read(&adev->reset_sem);
-
r = amdgpu_cs_parser_init(&parser, data);
if (r) {
DRM_ERROR("Failed to initialize parser %d!\n", r);
@@ -1333,8 +1331,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
out:
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
- up_read(&adev->reset_sem);
-
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index d85d13f7a043..8842c55d4490 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -358,8 +358,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
if (atomic_read(&ctx->guilty))
out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
- down_read(&adev->reset_sem);
-
/*query ue count*/
ras_counter = amdgpu_ras_query_error_count(adev, false);
/*ras counter is monotonic increasing*/
@@ -375,8 +373,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
ctx->ras_counter_ce = ras_counter;
}
- up_read(&adev->reset_sem);
-
mutex_unlock(&mgr->lock);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 0af249a1e35b..35fed75a4397 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -101,14 +101,14 @@ static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file)
file->private_data = adev;
- down_read(&adev->reset_sem);
+ mutex_lock(&adev->lock_reset);
if (adev->autodump.dumping.done) {
reinit_completion(&adev->autodump.dumping);
ret = 0;
} else {
ret = -EBUSY;
}
- up_read(&adev->reset_sem);
+ mutex_unlock(&adev->lock_reset);
return ret;
}
@@ -127,7 +127,7 @@ static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_
poll_wait(file, &adev->autodump.gpu_hang, poll_table);
- if (amdgpu_in_reset(adev))
+ if (adev->in_gpu_reset)
return POLLIN | POLLRDNORM | POLLWRNORM;
return 0;
@@ -1242,7 +1242,7 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
}
/* Avoid accidently unparking the sched thread during GPU reset */
- down_read(&adev->reset_sem);
+ mutex_lock(&adev->lock_reset);
/* hold on the scheduler */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
@@ -1269,7 +1269,7 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
kthread_unpark(ring->sched.thread);
}
- up_read(&adev->reset_sem);
+ mutex_unlock(&adev->lock_reset);
pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
@@ -1459,7 +1459,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
return -ENOMEM;
/* Avoid accidently unparking the sched thread during GPU reset */
- down_read(&adev->reset_sem);
+ mutex_lock(&adev->lock_reset);
/* stop the scheduler */
kthread_park(ring->sched.thread);
@@ -1500,7 +1500,7 @@ failure:
/* restart the scheduler */
kthread_unpark(ring->sched.thread);
- up_read(&adev->reset_sem);
+ mutex_unlock(&adev->lock_reset);
ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bb7f0c8611f9..415e1a32b98c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1940,7 +1940,7 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
if (adev->ip_blocks[i].status.hw == true)
break;
- if (amdgpu_in_reset(adev) || adev->in_suspend) {
+ if (adev->in_gpu_reset || adev->in_suspend) {
r = adev->ip_blocks[i].version->funcs->resume(adev);
if (r) {
DRM_ERROR("resume of IP block <%s> failed %d\n",
@@ -2117,7 +2117,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
AMDGPU_RESET_MAGIC_NUM))
return true;
- if (!amdgpu_in_reset(adev))
+ if (!adev->in_gpu_reset)
return false;
/*
@@ -3053,8 +3053,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->mn_lock);
mutex_init(&adev->virt.vf_errors.lock);
hash_init(adev->mn_hash);
- init_rwsem(&adev->reset_sem);
- atomic_set(&adev->in_gpu_reset, 0);
+ mutex_init(&adev->lock_reset);
mutex_init(&adev->psp.mutex);
mutex_init(&adev->notifier_lock);
@@ -4082,11 +4081,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
if (need_full_reset) {
/* post card */
- if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context)) {
- dev_warn(tmp_adev->dev, "asic atom init failed!");
- r = -EAGAIN;
- goto out;
- }
+ if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
+ DRM_WARN("asic atom init failed!");
if (!r) {
dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
@@ -4176,18 +4172,16 @@ end:
return r;
}
-static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive)
+static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
{
- if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
- return false;
-
- if (hive) {
- down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
- } else {
- down_write(&adev->reset_sem);
- }
+ if (trylock) {
+ if (!mutex_trylock(&adev->lock_reset))
+ return false;
+ } else
+ mutex_lock(&adev->lock_reset);
atomic_inc(&adev->gpu_reset_counter);
+ adev->in_gpu_reset = true;
switch (amdgpu_asic_reset_method(adev)) {
case AMD_RESET_METHOD_MODE1:
adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
@@ -4207,8 +4201,8 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
{
amdgpu_vf_error_trans_all(adev);
adev->mp1_state = PP_MP1_STATE_NONE;
- atomic_set(&adev->in_gpu_reset, 0);
- up_write(&adev->reset_sem);
+ adev->in_gpu_reset = false;
+ mutex_unlock(&adev->lock_reset);
}
static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
@@ -4318,14 +4312,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
* We always reset all schedulers for device and all devices for XGMI
* hive so that should take care of them too.
*/
- hive = amdgpu_get_xgmi_hive(adev, false);
- if (hive) {
- if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
- DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
- job ? job->base.id : -1, hive->hive_id);
- return 0;
- }
- mutex_lock(&hive->hive_lock);
+ hive = amdgpu_get_xgmi_hive(adev, true);
+ if (hive && !mutex_trylock(&hive->reset_lock)) {
+ DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
+ job ? job->base.id : -1, hive->hive_id);
+ mutex_unlock(&hive->hive_lock);
+ return 0;
}
/*
@@ -4347,11 +4339,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
/* block all schedulers and reset given job's ring */
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
- if (!amdgpu_device_lock_adev(tmp_adev, hive)) {
+ if (!amdgpu_device_lock_adev(tmp_adev, !hive)) {
DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
job ? job->base.id : -1);
- r = 0;
- goto skip_recovery;
+ mutex_unlock(&hive->hive_lock);
+ return 0;
}
/*
@@ -4484,9 +4476,8 @@ skip_sched_resume:
amdgpu_device_unlock_adev(tmp_adev);
}
-skip_recovery:
if (hive) {
- atomic_set(&hive->in_reset, 0);
+ mutex_unlock(&hive->reset_lock);
mutex_unlock(&hive->hive_lock);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 73cc68ab53d0..7f9e50247413 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -671,8 +671,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
bo_va = NULL;
}
- down_read(&adev->reset_sem);
-
switch (args->operation) {
case AMDGPU_VA_OP_MAP:
va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
@@ -702,8 +700,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
args->operation);
- up_read(&adev->reset_sem);
-
error_backoff:
ttm_eu_backoff_reservation(&ticket, &list);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8ccd17d02cc6..a819360a4b6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -719,7 +719,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
*
* also don't wait anymore for IRQ context
* */
- if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+ if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
goto failed_kiq_read;
might_sleep();
@@ -777,7 +777,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
*
* also don't wait anymore for IRQ context
* */
- if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+ if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
goto failed_kiq_write;
might_sleep();
@@ -796,5 +796,5 @@ failed_undo:
amdgpu_ring_undo(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
failed_kiq_write:
- dev_warn(adev->dev, "failed to write reg:%x\n", reg);
+ pr_err("failed to write reg:%x\n", reg);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 75d37dfb51aa..937029ad5271 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -220,17 +220,17 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
trace_amdgpu_sched_run_job(job);
- if (down_read_trylock(&ring->adev->reset_sem)) {
+ if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter))
+ dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */
+
+ if (finished->error < 0) {
+ DRM_INFO("Skip scheduling IBs!\n");
+ } else {
r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
- &fence);
- up_read(&ring->adev->reset_sem);
+ &fence);
if (r)
DRM_ERROR("Error scheduling IBs (%d)\n", r);
- } else {
- dma_fence_set_error(finished, -ECANCELED);
- DRM_INFO("Skip scheduling IBs!\n");
}
-
/* if gpu reset, hw fence will be replaced here */
dma_fence_put(job->fence);
job->fence = dma_fence_get(fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 58580a48b648..7619f1c3084d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1087,8 +1087,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
if (!fpriv)
return;
- down_read(&adev->reset_sem);
-
pm_runtime_get_sync(dev->dev);
if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD) != NULL)
@@ -1127,8 +1125,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
-
- up_read(&adev->reset_sem);
}
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 1705e328c6fc..65ad174bb976 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -163,7 +163,7 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
enum amd_pm_state_type pm;
int ret;
- if (amdgpu_in_reset(adev))
+ if (adev->in_gpu_reset)
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
@@ -172,8 +172,6 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
return ret;
}
- down_read(&adev->reset_sem);
-
if (is_support_sw_smu(adev)) {
if (adev->smu.ppt_funcs->get_current_power_state)
pm = smu_get_current_power_state(&adev->smu);
@@ -185,8 +183,6 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
pm = adev->pm.dpm.user_state;
}
- up_read(&adev->reset_sem);
-
pm_runtime_mark_last_busy(ddev->dev);
pm_runtime_put_autosuspend(ddev->dev);
@@ -205,7 +201,7 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
enum amd_pm_state_type state;
int ret;
- if (amdgpu_in_reset(adev))
+ if (adev->in_gpu_reset)
return -EPERM;
if (strncmp("battery", buf, strlen("battery")) == 0)
@@ -223,8 +219,6 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
return ret;
}
- down_read(&adev->reset_sem);
-
if (is_support_sw_smu(adev)) {
mutex_lock(&adev->pm.mutex);
adev->pm.dpm.user_state = state;
@@ -238,9 +232,6 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
amdgpu_pm_compute_clocks(adev);
}
-
- up_read(&adev->reset_sem);
-
pm_runtime_mark_last_busy(ddev->dev);
pm_runtime_put_autosuspend(ddev->dev);
@@ -316,7 +307,7 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
enum amd_dpm_forced_level level = 0xff;
int ret;
- if (amdgpu_in_reset(adev))
+ if (adev->in_gpu_reset)
return -EPERM;
ret = pm_runtime_get_sync(ddev->dev);
@@ -325,8 +316,6 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
return ret;
}
- down_read(&adev->reset_sem);
-
if (is_support_sw_smu(adev))
level = smu_get_performance_level(&adev->smu);
else if (adev->powerplay.pp_funcs->get_performance_level)
@@ -334,8 +323,6 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
else
level = adev->pm.dpm.forced_level;
- up_read(&adev->reset_sem);
-
pm_runtime_mark_last_busy(ddev->dev);
pm_runtime_put_autosuspend(ddev->dev);
@@ -362,7 +349,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
enum amd_dpm_forced_level current_level = 0xff;
int ret = 0;
- if (amdgpu_in_reset(adev))
+ if (adev->in_gpu_reset)
return -EPERM;
if (strncmp("low", buf, strlen("low")) == 0) {
@@ -393,8 +380,6 @@ static ssize_t amdgpu_set_power_