summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c274
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c43
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c11
33 files changed, 420 insertions, 378 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 566303c9942f..8685784f24a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -815,6 +815,8 @@ struct ip_discovery_top;
#define AMDGPU_RESET_MAGIC_NUM 64
#define AMDGPU_MAX_DF_PERFMONS 4
#define AMDGPU_PRODUCT_NAME_LEN 64
+struct amdgpu_reset_domain;
+
struct amdgpu_device {
struct device *dev;
struct pci_dev *pdev;
@@ -1050,9 +1052,7 @@ struct amdgpu_device {
bool in_s4;
bool in_s0ix;
- atomic_t in_gpu_reset;
enum pp_mp1_state mp1_state;
- struct rw_semaphore reset_sem;
struct amdgpu_doorbell_index doorbell_index;
struct mutex notifier_lock;
@@ -1100,6 +1100,8 @@ struct amdgpu_device {
struct list_head ras_list;
struct ip_discovery_top *ip_top;
+
+ struct amdgpu_reset_domain *reset_domain;
};
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
@@ -1293,6 +1295,8 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev);
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job* job);
+int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
+ struct amdgpu_job *job);
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
int amdgpu_device_pci_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev);
@@ -1479,8 +1483,6 @@ static inline bool amdgpu_is_tmz(struct amdgpu_device *adev)
return adev->gmc.tmz_enabled;
}
-static inline int amdgpu_in_reset(struct amdgpu_device *adev)
-{
- return atomic_read(&adev->in_gpu_reset);
-}
+int amdgpu_in_reset(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 10b9e99c8941..e762e45b7b85 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -312,7 +312,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
}
total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
- used_vram = amdgpu_vram_mgr_usage(&adev->mman.vram_mgr);
+ used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
spin_lock(&adev->mm_stats.lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index db8a8710333e..ae0c83237608 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -37,6 +37,8 @@
#include "amdgpu_fw_attestation.h"
#include "amdgpu_umr.h"
+#include "amdgpu_reset.h"
+
#if defined(CONFIG_DEBUG_FS)
/**
@@ -1284,7 +1286,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
}
/* Avoid accidently unparking the sched thread during GPU reset */
- r = down_write_killable(&adev->reset_sem);
+ r = down_write_killable(&adev->reset_domain->sem);
if (r)
return r;
@@ -1313,7 +1315,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
kthread_unpark(ring->sched.thread);
}
- up_write(&adev->reset_sem);
+ up_write(&adev->reset_domain->sem);
pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
@@ -1522,7 +1524,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
return -ENOMEM;
/* Avoid accidently unparking the sched thread during GPU reset */
- r = down_read_killable(&adev->reset_sem);
+ r = down_read_killable(&adev->reset_domain->sem);
if (r)
goto pro_end;
@@ -1565,7 +1567,7 @@ failure:
/* restart the scheduler */
kthread_unpark(ring->sched.thread);
- up_read(&adev->reset_sem);
+ up_read(&adev->reset_domain->sem);
ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 0cfccea722f8..ca8e76771ea1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -426,10 +426,10 @@ bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
* the lock.
*/
if (in_task()) {
- if (down_read_trylock(&adev->reset_sem))
- up_read(&adev->reset_sem);
+ if (down_read_trylock(&adev->reset_domain->sem))
+ up_read(&adev->reset_domain->sem);
else
- lockdep_assert_held(&adev->reset_sem);
+ lockdep_assert_held(&adev->reset_domain->sem);
}
#endif
return false;
@@ -455,9 +455,9 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
if ((reg * 4) < adev->rmmio_size) {
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
amdgpu_sriov_runtime(adev) &&
- down_read_trylock(&adev->reset_sem)) {
+ down_read_trylock(&adev->reset_domain->sem)) {
ret = amdgpu_kiq_rreg(adev, reg);
- up_read(&adev->reset_sem);
+ up_read(&adev->reset_domain->sem);
} else {
ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
}
@@ -540,9 +540,9 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
if ((reg * 4) < adev->rmmio_size) {
if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
amdgpu_sriov_runtime(adev) &&
- down_read_trylock(&adev->reset_sem)) {
+ down_read_trylock(&adev->reset_domain->sem)) {
amdgpu_kiq_wreg(adev, reg, v);
- up_read(&adev->reset_sem);
+ up_read(&adev->reset_domain->sem);
} else {
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
}
@@ -2331,6 +2331,49 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
return r;
}
+static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
+{
+ long timeout;
+ int r, i;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ /* No need to setup the GPU scheduler for rings that don't need it */
+ if (!ring || ring->no_scheduler)
+ continue;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ timeout = adev->gfx_timeout;
+ break;
+ case AMDGPU_RING_TYPE_COMPUTE:
+ timeout = adev->compute_timeout;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ timeout = adev->sdma_timeout;
+ break;
+ default:
+ timeout = adev->video_timeout;
+ break;
+ }
+
+ r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
+ ring->num_hw_submission, amdgpu_job_hang_limit,
+ timeout, adev->reset_domain->wq,
+ ring->sched_score, ring->name,
+ adev->dev);
+ if (r) {
+ DRM_ERROR("Failed to create scheduler on ring %s.\n",
+ ring->name);
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+
/**
* amdgpu_device_ip_init - run init for hardware IPs
*
@@ -2442,8 +2485,28 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (r)
goto init_failed;
- if (adev->gmc.xgmi.num_physical_nodes > 1)
- amdgpu_xgmi_add_device(adev);
+ /**
+ * In case of XGMI grab extra reference for reset domain for this device
+ */
+ if (adev->gmc.xgmi.num_physical_nodes > 1) {
+ if (amdgpu_xgmi_add_device(adev) == 0) {
+ struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
+
+ if (!hive->reset_domain ||
+ !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
+ r = -ENOENT;
+ goto init_failed;
+ }
+
+ /* Drop the early temporary reset domain we created for device */
+ amdgpu_reset_put_reset_domain(adev->reset_domain);
+ adev->reset_domain = hive->reset_domain;
+ }
+ }
+
+ r = amdgpu_device_init_schedulers(adev);
+ if (r)
+ goto init_failed;
/* Don't init kfd if whole hive need to be reset during init */
if (!adev->gmc.xgmi.pending_reset)
@@ -3543,8 +3606,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->mn_lock);
mutex_init(&adev->virt.vf_errors.lock);
hash_init(adev->mn_hash);
- atomic_set(&adev->in_gpu_reset, 0);
- init_rwsem(&adev->reset_sem);
mutex_init(&adev->psp.mutex);
mutex_init(&adev->notifier_lock);
mutex_init(&adev->pm.stable_pstate_ctx_lock);
@@ -3630,6 +3691,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
return r;
}
+ /*
+ * Reset domain needs to be present early, before XGMI hive discovered
+ * (if any) and intitialized to use reset sem and in_gpu reset flag
+ * early on during init.
+ */
+ adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE ,"amdgpu-reset-dev");
+ if (!adev->reset_domain)
+ return -ENOMEM;
+
/* early init functions */
r = amdgpu_device_ip_early_init(adev);
if (r)
@@ -4006,6 +4076,9 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
if (adev->mman.discovery_bin)
amdgpu_discovery_fini(adev);
+ amdgpu_reset_put_reset_domain(adev->reset_domain);
+ adev->reset_domain = NULL;
+
kfree(adev->pci_state);
}
@@ -4817,17 +4890,8 @@ end:
return r;
}
-static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
- struct amdgpu_hive_info *hive)
+static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
{
- if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
- return false;
-
- if (hive) {
- down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
- } else {
- down_write(&adev->reset_sem);
- }
switch (amdgpu_asic_reset_method(adev)) {
case AMD_RESET_METHOD_MODE1:
@@ -4840,56 +4904,12 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
adev->mp1_state = PP_MP1_STATE_NONE;
break;
}
-
- return true;
}
-static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
+static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
{
amdgpu_vf_error_trans_all(adev);
adev->mp1_state = PP_MP1_STATE_NONE;
- atomic_set(&adev->in_gpu_reset, 0);
- up_write(&adev->reset_sem);
-}
-
-/*
- * to lockup a list of amdgpu devices in a hive safely, if not a hive
- * with multiple nodes, it will be similar as amdgpu_device_lock_adev.
- *
- * unlock won't require roll back.
- */
-static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive)
-{
- struct amdgpu_device *tmp_adev = NULL;
-
- if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
- if (!hive) {
- dev_err(adev->dev, "Hive is NULL while device has multiple xgmi nodes");
- return -ENODEV;
- }
- list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- if (!amdgpu_device_lock_adev(tmp_adev, hive))
- goto roll_back;
- }
- } else if (!amdgpu_device_lock_adev(adev, hive))
- return -EAGAIN;
-
- return 0;
-roll_back:
- if (!list_is_first(&tmp_adev->gmc.xgmi.head, &hive->device_list)) {
- /*
- * if the lockup iteration break in the middle of a hive,
- * it may means there may has a race issue,
- * or a hive device locked up independently.
- * we may be in trouble and may not, so will try to roll back
- * the lock and give out a warnning.
- */
- dev_warn(tmp_adev->dev, "Hive lock iteration broke in the middle. Rolling back to unlock");
- list_for_each_entry_continue_reverse(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- amdgpu_device_unlock_adev(tmp_adev);
- }
- }
- return -EAGAIN;
}
static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
@@ -5023,7 +5043,7 @@ retry:
}
/**
- * amdgpu_device_gpu_recover - reset the asic and recover scheduler
+ * amdgpu_device_gpu_recover_imp - reset the asic and recover scheduler
*
* @adev: amdgpu_device pointer
* @job: which job trigger hang
@@ -5033,7 +5053,7 @@ retry:
* Returns 0 for success or an error on failure.
*/
-int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
struct amdgpu_job *job)
{
struct list_head device_list, *device_list_handle = NULL;
@@ -5067,26 +5087,10 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
dev_info(adev->dev, "GPU %s begin!\n",
need_emergency_restart ? "jobs stop":"reset");
- /*
- * Here we trylock to avoid chain of resets executing from
- * either trigger by jobs on different adevs in XGMI hive or jobs on
- * different schedulers for same device while this TO handler is running.
- * We always reset all schedulers for device and all devices for XGMI
- * hive so that should take care of them too.
- */
if (!amdgpu_sriov_vf(adev))
hive = amdgpu_get_xgmi_hive(adev);
- if (hive) {
- if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
- DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
- job ? job->base.id : -1, hive->hive_id);
- amdgpu_put_xgmi_hive(hive);
- if (job && job->vm)
- drm_sched_increase_karma(&job->base);
- return 0;
- }
+ if (hive)
mutex_lock(&hive->hive_lock);
- }
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
@@ -5095,22 +5099,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
/*
- * lock the device before we try to operate the linked list
- * if didn't get the device lock, don't touch the linked list since
- * others may iterating it.
- */
- r = amdgpu_device_lock_hive_adev(adev, hive);
- if (r) {
- dev_info(adev->dev, "Bailing on TDR for s_job:%llx, as another already in progress",
- job ? job->base.id : -1);
-
- /* even we skipped this reset, still need to set the job to guilty */
- if (job && job->vm)
- drm_sched_increase_karma(&job->base);
- goto skip_recovery;
- }
-
- /*
* Build list of devices to reset.
* In case we are in XGMI hive mode, resort the device list
* to put adev in the 1st position.
@@ -5127,8 +5115,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
device_list_handle = &device_list;
}
+ /* We need to lock reset domain only once both for XGMI and single device */
+ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
+ reset_list);
+ amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
+
/* block all schedulers and reset given job's ring */
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+
+ amdgpu_device_set_mp1_state(tmp_adev);
+
/*
* Try to put the audio codec into suspend state
* before gpu reset started.
@@ -5280,21 +5276,55 @@ skip_sched_resume:
if (audio_suspended)
amdgpu_device_resume_display_audio(tmp_adev);
- amdgpu_device_unlock_adev(tmp_adev);
+
+ amdgpu_device_unset_mp1_state(tmp_adev);
}
-skip_recovery:
+ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
+ reset_list);
+ amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
+
if (hive) {
- atomic_set(&hive->in_reset, 0);
mutex_unlock(&hive->hive_lock);
amdgpu_put_xgmi_hive(hive);
}
- if (r && r != -EAGAIN)
+ if (r)
dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
return r;
}
+struct amdgpu_recover_work_struct {
+ struct work_struct base;
+ struct amdgpu_device *adev;
+ struct amdgpu_job *job;
+ int ret;
+};
+
+static void amdgpu_device_queue_gpu_recover_work(struct work_struct *work)
+{
+ struct amdgpu_recover_work_struct *recover_work = container_of(work, struct amdgpu_recover_work_struct, base);
+
+ recover_work->ret = amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job);
+}
+/*
+ * Serialize gpu recover into reset domain single threaded wq
+ */
+int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+ struct amdgpu_job *job)
+{
+ struct amdgpu_recover_work_struct work = {.adev = adev, .job = job};
+
+ INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
+
+ if (!amdgpu_reset_domain_schedule(adev->reset_domain, &work.base))
+ return -EAGAIN;
+
+ flush_work(&work.base);
+
+ return work.ret;
+}
+
/**
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
*
@@ -5482,20 +5512,6 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
return 0;
}
-static void amdgpu_cancel_all_tdr(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_ring *ring = adev->rings[i];
-
- if (!ring || !ring->sched.thread)
- continue;
-
- cancel_delayed_work_sync(&ring->sched.work_tdr);
- }
-}
-
/**
* amdgpu_pci_error_detected - Called when a PCI error is detected.
* @pdev: PCI device struct
@@ -5526,14 +5542,11 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
/* Fatal error, prepare for slot reset */
case pci_channel_io_frozen:
/*
- * Cancel and wait for all TDRs in progress if failing to
- * set adev->in_gpu_reset in amdgpu_device_lock_adev
- *
- * Locking adev->reset_sem will prevent any external access
+ * Locking adev->reset_domain->sem will prevent any external access
* to GPU during PCI error recovery
*/
- while (!amdgpu_device_lock_adev(adev, NULL))
- amdgpu_cancel_all_tdr(adev);
+ amdgpu_device_lock_reset_domain(adev->reset_domain);
+ amdgpu_device_set_mp1_state(adev);
/*
* Block any work scheduling as we do for regular GPU reset
@@ -5640,7 +5653,8 @@ out:
DRM_INFO("PCIe error recovery succeeded\n");
} else {
DRM_ERROR("PCIe error recovery failed, err:%d", r);
- amdgpu_device_unlock_adev(adev);
+ amdgpu_device_unset_mp1_state(adev);
+ amdgpu_device_unlock_reset_domain(adev->reset_domain);
}
return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
@@ -5677,7 +5691,8 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
drm_sched_start(&ring->sched, true);
}
- amdgpu_device_unlock_adev(adev);
+ amdgpu_device_unset_mp1_state(adev);
+ amdgpu_device_unlock_reset_domain(adev->reset_domain);
}
bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
@@ -5754,6 +5769,11 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
amdgpu_asic_invalidate_hdp(adev, ring);
}
+int amdgpu_in_reset(struct amdgpu_device *adev)
+{
+ return atomic_read(&adev->reset_domain->in_gpu_reset);
+ }
+
/**
* amdgpu_device_halt() - bring hardware to some kind of halt state
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index ec4c9ef5f795..9e5fc4cdb8ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -961,7 +961,7 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
int ret;
unsigned int i, block_width, block_height, block_size_log2;
- if (!rfb->base.dev->mode_config.allow_fb_modifiers)
+ if (rfb->base.dev->mode_config.fb_modifiers_not_supported)
return 0;
for (i = 0; i < format_info->num_planes; ++i) {
@@ -1148,7 +1148,7 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
if (ret)
return ret;
- if (!dev->mode_config.allow_fb_modifiers) {
+ if (dev->mode_config.fb_modifiers_not_supported) {
drm_WARN_ONCE(dev, adev->family >= AMDGPU_FAMILY_AI,
"GFX9+ requires FB check based on format modifier\n");
ret = check_tiling_flags_gfx6(rfb);
@@ -1156,7 +1156,7 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev,
return ret;
}
- if (dev->mode_config.allow_fb_modifiers &&
+ if (!dev->mode_config.fb_modifiers_not_supported &&
!(rfb->base.flags & DRM_MODE_FB_MODIFIERS)) {
ret = convert_tiling_flags_to_modifier(rfb);
if (ret) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 45977a72b5dd..5d13ed376ab4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -446,24 +446,18 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
* for the requested ring.
*
* @ring: ring to init the fence driver on
- * @num_hw_submission: number of entries on the hardware queue
- * @sched_score: optional score atomic shared with other schedulers
*
* Init the fence driver for the requested ring (all asics).
* Helper function for amdgpu_fence_driver_init().
*/
-int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
- unsigned num_hw_submission,
- atomic_t *sched_score)
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- long timeout;
- int r;
if (!adev)
return -EINVAL;
- if (!is_power_of_2(num_hw_submission))
+ if (!is_power_of_2(ring->num_hw_submission))
return -EINVAL;
ring->fence_drv.cpu_addr = NULL;
@@ -474,41 +468,14 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0);
- ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1;
+ ring->fence_drv.num_fences_mask = ring->num_hw_submission * 2 - 1;
spin_lock_init(&ring->fence_drv.lock);
- ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *),
+ ring->fence_drv.fences = kcalloc(ring->num_hw_submission * 2, sizeof(void *),
GFP_KERNEL);
+
if (!ring->fence_drv.fences)
return -ENOMEM;
- /* No need to setup the GPU scheduler for rings that don't need it */
- if (ring->no_scheduler)
- return 0;
-
- switch (ring->funcs->type) {
- case AMDGPU_RING_TYPE_GFX:
- timeout = adev->gfx_timeout;
- break;
- case AMDGPU_RING_TYPE_COMPUTE:
- timeout = adev->compute_timeout;
- break;
- case AMDGPU_RING_TYPE_SDMA:
- timeout = adev->sdma_timeout;
- break;
- default:
- timeout = adev->video_timeout;
- break;
- }
-
- r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
- num_hw_submission, amdgpu_job_hang_limit,
- timeout, NULL, sched_score, ring->name);
- if (r) {
- DRM_ERROR("Failed to create scheduler on ring %s.\n",
- ring->name);
- return r;
- }
-
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 899a47011a67..dd78402e3cb0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -60,7 +60,7 @@ static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev,
struct ttm_resource_manager *man;
man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
- return sysfs_emit(buf, "%llu\n", man->size * PAGE_SIZE);
+ return sysfs_emit(buf, "%llu\n", man->size);
}
/**
@@ -77,8 +77,9 @@ static ssize_t amdgpu_mem_info_gtt_used_show(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
+ struct ttm_resource_manager *man = &adev->mman.gtt_mgr.manager;
- return sysfs_emit(buf, "%llu\n", amdgpu_gtt_mgr_usage(&adev->mman.gtt_mgr));
+ return sysfs_emit(buf, "%llu\n", ttm_resource_manager_usage(man));
}
static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO,
@@ -130,20 +131,17 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
struct amdgpu_gtt_node *node;
int r;
- if (!(place->flags & TTM_PL_FLAG_TEMPORARY) &&
- atomic64_add_return(num_pages, &mgr->used) > man->size) {
- atomic64_sub(num_pages, &mgr->used);
- return -ENOSPC;
- }
-
node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL);
- if (!node) {
- r = -ENOMEM;
- goto err_out;
- }
+ if (!node)
+ return -ENOMEM;
node->tbo = tbo;
ttm_resource_init(tbo, place, &node->base.base);
+ if (!(place->flags & TTM_PL_FLAG_TEMPORARY) &&
+ ttm_resource_manager_usage(man) > man->size) {
+ r = -ENOSPC;
+ goto err_free;
+ }
if (place->lpfn) {
spin_lock(&mgr->lock);
@@ -169,11 +167,6 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
err_free:
ttm_resource_fini(man, &node->base.base);
kfree(node);
-
-err_out:
- if (!(place->flags & TTM_PL_FLAG_TEMPORARY))
- atomic64_sub(num_pages, &mgr->used);
-
return r;
}
@@ -196,26 +189,11 @@ static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man,
drm_mm_remove_node(&node->base.mm_nodes[0]);
spin_unlock(&mgr->lock);
- if (!(res->placement & TTM_PL_FLAG_TEMPORARY))
- atomic64_sub(res->num_pages, &mgr->used);
-
ttm_resource_fini(man, res);
kfree(node);