diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
33 files changed, 420 insertions, 378 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 566303c9942f..8685784f24a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -815,6 +815,8 @@ struct ip_discovery_top; #define AMDGPU_RESET_MAGIC_NUM 64 #define AMDGPU_MAX_DF_PERFMONS 4 #define AMDGPU_PRODUCT_NAME_LEN 64 +struct amdgpu_reset_domain; + struct amdgpu_device { struct device *dev; struct pci_dev *pdev; @@ -1050,9 +1052,7 @@ struct amdgpu_device { bool in_s4; bool in_s0ix; - atomic_t in_gpu_reset; enum pp_mp1_state mp1_state; - struct rw_semaphore reset_sem; struct amdgpu_doorbell_index doorbell_index; struct mutex notifier_lock; @@ -1100,6 +1100,8 @@ struct amdgpu_device { struct list_head ras_list; struct ip_discovery_top *ip_top; + + struct amdgpu_reset_domain *reset_domain; }; static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) @@ -1293,6 +1295,8 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev); bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job); +int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, + struct amdgpu_job *job); void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); int amdgpu_device_pci_reset(struct amdgpu_device *adev); bool amdgpu_device_need_post(struct amdgpu_device *adev); @@ -1479,8 +1483,6 @@ static inline bool amdgpu_is_tmz(struct amdgpu_device *adev) return adev->gmc.tmz_enabled; } -static inline int amdgpu_in_reset(struct amdgpu_device *adev) -{ - return atomic_read(&adev->in_gpu_reset); -} +int amdgpu_in_reset(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 10b9e99c8941..e762e45b7b85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -312,7 +312,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, } total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size); - used_vram = amdgpu_vram_mgr_usage(&adev->mman.vram_mgr); + used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager); free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; spin_lock(&adev->mm_stats.lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index db8a8710333e..ae0c83237608 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -37,6 +37,8 @@ #include "amdgpu_fw_attestation.h" #include "amdgpu_umr.h" +#include "amdgpu_reset.h" + #if defined(CONFIG_DEBUG_FS) /** @@ -1284,7 +1286,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) } /* Avoid accidently unparking the sched thread during GPU reset */ - r = down_write_killable(&adev->reset_sem); + r = down_write_killable(&adev->reset_domain->sem); if (r) return r; @@ -1313,7 +1315,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) kthread_unpark(ring->sched.thread); } - up_write(&adev->reset_sem); + up_write(&adev->reset_domain->sem); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); @@ -1522,7 +1524,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) return -ENOMEM; /* Avoid accidently unparking the sched thread during GPU reset */ - r = down_read_killable(&adev->reset_sem); + r = down_read_killable(&adev->reset_domain->sem); if (r) goto pro_end; @@ -1565,7 +1567,7 @@ failure: /* restart the scheduler */ kthread_unpark(ring->sched.thread); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0cfccea722f8..ca8e76771ea1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -426,10 +426,10 @@ bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev) * the lock. */ if (in_task()) { - if (down_read_trylock(&adev->reset_sem)) - up_read(&adev->reset_sem); + if (down_read_trylock(&adev->reset_domain->sem)) + up_read(&adev->reset_domain->sem); else - lockdep_assert_held(&adev->reset_sem); + lockdep_assert_held(&adev->reset_domain->sem); } #endif return false; @@ -455,9 +455,9 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, if ((reg * 4) < adev->rmmio_size) { if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) && - down_read_trylock(&adev->reset_sem)) { + down_read_trylock(&adev->reset_domain->sem)) { ret = amdgpu_kiq_rreg(adev, reg); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); } else { ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); } @@ -540,9 +540,9 @@ void amdgpu_device_wreg(struct amdgpu_device *adev, if ((reg * 4) < adev->rmmio_size) { if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) && - down_read_trylock(&adev->reset_sem)) { + down_read_trylock(&adev->reset_domain->sem)) { amdgpu_kiq_wreg(adev, reg, v); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); } else { writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); } @@ -2331,6 +2331,49 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) return r; } +static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) +{ + long timeout; + int r, i; + + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + struct amdgpu_ring *ring = adev->rings[i]; + + /* No need to setup the GPU scheduler for rings that don't need it */ + if (!ring || ring->no_scheduler) + continue; + + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_GFX: + timeout = adev->gfx_timeout; + break; + case AMDGPU_RING_TYPE_COMPUTE: + timeout = adev->compute_timeout; + break; + case AMDGPU_RING_TYPE_SDMA: + timeout = adev->sdma_timeout; + break; + default: + timeout = adev->video_timeout; + break; + } + + r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, + ring->num_hw_submission, amdgpu_job_hang_limit, + timeout, adev->reset_domain->wq, + ring->sched_score, ring->name, + adev->dev); + if (r) { + DRM_ERROR("Failed to create scheduler on ring %s.\n", + ring->name); + return r; + } + } + + return 0; +} + + /** * amdgpu_device_ip_init - run init for hardware IPs * @@ -2442,8 +2485,28 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed; - if (adev->gmc.xgmi.num_physical_nodes > 1) - amdgpu_xgmi_add_device(adev); + /** + * In case of XGMI grab extra reference for reset domain for this device + */ + if (adev->gmc.xgmi.num_physical_nodes > 1) { + if (amdgpu_xgmi_add_device(adev) == 0) { + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + + if (!hive->reset_domain || + !amdgpu_reset_get_reset_domain(hive->reset_domain)) { + r = -ENOENT; + goto init_failed; + } + + /* Drop the early temporary reset domain we created for device */ + amdgpu_reset_put_reset_domain(adev->reset_domain); + adev->reset_domain = hive->reset_domain; + } + } + + r = amdgpu_device_init_schedulers(adev); + if (r) + goto init_failed; /* Don't init kfd if whole hive need to be reset during init */ if (!adev->gmc.xgmi.pending_reset) @@ -3543,8 +3606,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->mn_lock); mutex_init(&adev->virt.vf_errors.lock); hash_init(adev->mn_hash); - atomic_set(&adev->in_gpu_reset, 0); - init_rwsem(&adev->reset_sem); mutex_init(&adev->psp.mutex); mutex_init(&adev->notifier_lock); mutex_init(&adev->pm.stable_pstate_ctx_lock); @@ -3630,6 +3691,15 @@ int amdgpu_device_init(struct amdgpu_device *adev, return r; } + /* + * Reset domain needs to be present early, before XGMI hive discovered + * (if any) and intitialized to use reset sem and in_gpu reset flag + * early on during init. + */ + adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE ,"amdgpu-reset-dev"); + if (!adev->reset_domain) + return -ENOMEM; + /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) @@ -4006,6 +4076,9 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) if (adev->mman.discovery_bin) amdgpu_discovery_fini(adev); + amdgpu_reset_put_reset_domain(adev->reset_domain); + adev->reset_domain = NULL; + kfree(adev->pci_state); } @@ -4817,17 +4890,8 @@ end: return r; } -static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, - struct amdgpu_hive_info *hive) +static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev) { - if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0) - return false; - - if (hive) { - down_write_nest_lock(&adev->reset_sem, &hive->hive_lock); - } else { - down_write(&adev->reset_sem); - } switch (amdgpu_asic_reset_method(adev)) { case AMD_RESET_METHOD_MODE1: @@ -4840,56 +4904,12 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, adev->mp1_state = PP_MP1_STATE_NONE; break; } - - return true; } -static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) +static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev) { amdgpu_vf_error_trans_all(adev); adev->mp1_state = PP_MP1_STATE_NONE; - atomic_set(&adev->in_gpu_reset, 0); - up_write(&adev->reset_sem); -} - -/* - * to lockup a list of amdgpu devices in a hive safely, if not a hive - * with multiple nodes, it will be similar as amdgpu_device_lock_adev. - * - * unlock won't require roll back. - */ -static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive) -{ - struct amdgpu_device *tmp_adev = NULL; - - if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) { - if (!hive) { - dev_err(adev->dev, "Hive is NULL while device has multiple xgmi nodes"); - return -ENODEV; - } - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - if (!amdgpu_device_lock_adev(tmp_adev, hive)) - goto roll_back; - } - } else if (!amdgpu_device_lock_adev(adev, hive)) - return -EAGAIN; - - return 0; -roll_back: - if (!list_is_first(&tmp_adev->gmc.xgmi.head, &hive->device_list)) { - /* - * if the lockup iteration break in the middle of a hive, - * it may means there may has a race issue, - * or a hive device locked up independently. - * we may be in trouble and may not, so will try to roll back - * the lock and give out a warnning. - */ - dev_warn(tmp_adev->dev, "Hive lock iteration broke in the middle. Rolling back to unlock"); - list_for_each_entry_continue_reverse(tmp_adev, &hive->device_list, gmc.xgmi.head) { - amdgpu_device_unlock_adev(tmp_adev); - } - } - return -EAGAIN; } static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) @@ -5023,7 +5043,7 @@ retry: } /** - * amdgpu_device_gpu_recover - reset the asic and recover scheduler + * amdgpu_device_gpu_recover_imp - reset the asic and recover scheduler * * @adev: amdgpu_device pointer * @job: which job trigger hang @@ -5033,7 +5053,7 @@ retry: * Returns 0 for success or an error on failure. */ -int amdgpu_device_gpu_recover(struct amdgpu_device *adev, +int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, struct amdgpu_job *job) { struct list_head device_list, *device_list_handle = NULL; @@ -5067,26 +5087,10 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, dev_info(adev->dev, "GPU %s begin!\n", need_emergency_restart ? "jobs stop":"reset"); - /* - * Here we trylock to avoid chain of resets executing from - * either trigger by jobs on different adevs in XGMI hive or jobs on - * different schedulers for same device while this TO handler is running. - * We always reset all schedulers for device and all devices for XGMI - * hive so that should take care of them too. - */ if (!amdgpu_sriov_vf(adev)) hive = amdgpu_get_xgmi_hive(adev); - if (hive) { - if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) { - DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", - job ? job->base.id : -1, hive->hive_id); - amdgpu_put_xgmi_hive(hive); - if (job && job->vm) - drm_sched_increase_karma(&job->base); - return 0; - } + if (hive) mutex_lock(&hive->hive_lock); - } reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; @@ -5095,22 +5099,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); /* - * lock the device before we try to operate the linked list - * if didn't get the device lock, don't touch the linked list since - * others may iterating it. - */ - r = amdgpu_device_lock_hive_adev(adev, hive); - if (r) { - dev_info(adev->dev, "Bailing on TDR for s_job:%llx, as another already in progress", - job ? job->base.id : -1); - - /* even we skipped this reset, still need to set the job to guilty */ - if (job && job->vm) - drm_sched_increase_karma(&job->base); - goto skip_recovery; - } - - /* * Build list of devices to reset. * In case we are in XGMI hive mode, resort the device list * to put adev in the 1st position. @@ -5127,8 +5115,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, device_list_handle = &device_list; } + /* We need to lock reset domain only once both for XGMI and single device */ + tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, + reset_list); + amdgpu_device_lock_reset_domain(tmp_adev->reset_domain); + /* block all schedulers and reset given job's ring */ list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + + amdgpu_device_set_mp1_state(tmp_adev); + /* * Try to put the audio codec into suspend state * before gpu reset started. @@ -5280,21 +5276,55 @@ skip_sched_resume: if (audio_suspended) amdgpu_device_resume_display_audio(tmp_adev); - amdgpu_device_unlock_adev(tmp_adev); + + amdgpu_device_unset_mp1_state(tmp_adev); } -skip_recovery: + tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, + reset_list); + amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); + if (hive) { - atomic_set(&hive->in_reset, 0); mutex_unlock(&hive->hive_lock); amdgpu_put_xgmi_hive(hive); } - if (r && r != -EAGAIN) + if (r) dev_info(adev->dev, "GPU reset end with ret = %d\n", r); return r; } +struct amdgpu_recover_work_struct { + struct work_struct base; + struct amdgpu_device *adev; + struct amdgpu_job *job; + int ret; +}; + +static void amdgpu_device_queue_gpu_recover_work(struct work_struct *work) +{ + struct amdgpu_recover_work_struct *recover_work = container_of(work, struct amdgpu_recover_work_struct, base); + + recover_work->ret = amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job); +} +/* + * Serialize gpu recover into reset domain single threaded wq + */ +int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + struct amdgpu_job *job) +{ + struct amdgpu_recover_work_struct work = {.adev = adev, .job = job}; + + INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work); + + if (!amdgpu_reset_domain_schedule(adev->reset_domain, &work.base)) + return -EAGAIN; + + flush_work(&work.base); + + return work.ret; +} + /** * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot * @@ -5482,20 +5512,6 @@ int amdgpu_device_baco_exit(struct drm_device *dev) return 0; } -static void amdgpu_cancel_all_tdr(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; - - if (!ring || !ring->sched.thread) - continue; - - cancel_delayed_work_sync(&ring->sched.work_tdr); - } -} - /** * amdgpu_pci_error_detected - Called when a PCI error is detected. * @pdev: PCI device struct @@ -5526,14 +5542,11 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta /* Fatal error, prepare for slot reset */ case pci_channel_io_frozen: /* - * Cancel and wait for all TDRs in progress if failing to - * set adev->in_gpu_reset in amdgpu_device_lock_adev - * - * Locking adev->reset_sem will prevent any external access + * Locking adev->reset_domain->sem will prevent any external access * to GPU during PCI error recovery */ - while (!amdgpu_device_lock_adev(adev, NULL)) - amdgpu_cancel_all_tdr(adev); + amdgpu_device_lock_reset_domain(adev->reset_domain); + amdgpu_device_set_mp1_state(adev); /* * Block any work scheduling as we do for regular GPU reset @@ -5640,7 +5653,8 @@ out: DRM_INFO("PCIe error recovery succeeded\n"); } else { DRM_ERROR("PCIe error recovery failed, err:%d", r); - amdgpu_device_unlock_adev(adev); + amdgpu_device_unset_mp1_state(adev); + amdgpu_device_unlock_reset_domain(adev->reset_domain); } return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; @@ -5677,7 +5691,8 @@ void amdgpu_pci_resume(struct pci_dev *pdev) drm_sched_start(&ring->sched, true); } - amdgpu_device_unlock_adev(adev); + amdgpu_device_unset_mp1_state(adev); + amdgpu_device_unlock_reset_domain(adev->reset_domain); } bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) @@ -5754,6 +5769,11 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, amdgpu_asic_invalidate_hdp(adev, ring); } +int amdgpu_in_reset(struct amdgpu_device *adev) +{ + return atomic_read(&adev->reset_domain->in_gpu_reset); + } + /** * amdgpu_device_halt() - bring hardware to some kind of halt state * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index ec4c9ef5f795..9e5fc4cdb8ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -961,7 +961,7 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb) int ret; unsigned int i, block_width, block_height, block_size_log2; - if (!rfb->base.dev->mode_config.allow_fb_modifiers) + if (rfb->base.dev->mode_config.fb_modifiers_not_supported) return 0; for (i = 0; i < format_info->num_planes; ++i) { @@ -1148,7 +1148,7 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev, if (ret) return ret; - if (!dev->mode_config.allow_fb_modifiers) { + if (dev->mode_config.fb_modifiers_not_supported) { drm_WARN_ONCE(dev, adev->family >= AMDGPU_FAMILY_AI, "GFX9+ requires FB check based on format modifier\n"); ret = check_tiling_flags_gfx6(rfb); @@ -1156,7 +1156,7 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev, return ret; } - if (dev->mode_config.allow_fb_modifiers && + if (!dev->mode_config.fb_modifiers_not_supported && !(rfb->base.flags & DRM_MODE_FB_MODIFIERS)) { ret = convert_tiling_flags_to_modifier(rfb); if (ret) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 45977a72b5dd..5d13ed376ab4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -446,24 +446,18 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, * for the requested ring. * * @ring: ring to init the fence driver on - * @num_hw_submission: number of entries on the hardware queue - * @sched_score: optional score atomic shared with other schedulers * * Init the fence driver for the requested ring (all asics). * Helper function for amdgpu_fence_driver_init(). */ -int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, - unsigned num_hw_submission, - atomic_t *sched_score) +int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - long timeout; - int r; if (!adev) return -EINVAL; - if (!is_power_of_2(num_hw_submission)) + if (!is_power_of_2(ring->num_hw_submission)) return -EINVAL; ring->fence_drv.cpu_addr = NULL; @@ -474,41 +468,14 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0); - ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1; + ring->fence_drv.num_fences_mask = ring->num_hw_submission * 2 - 1; spin_lock_init(&ring->fence_drv.lock); - ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *), + ring->fence_drv.fences = kcalloc(ring->num_hw_submission * 2, sizeof(void *), GFP_KERNEL); + if (!ring->fence_drv.fences) return -ENOMEM; - /* No need to setup the GPU scheduler for rings that don't need it */ - if (ring->no_scheduler) - return 0; - - switch (ring->funcs->type) { - case AMDGPU_RING_TYPE_GFX: - timeout = adev->gfx_timeout; - break; - case AMDGPU_RING_TYPE_COMPUTE: - timeout = adev->compute_timeout; - break; - case AMDGPU_RING_TYPE_SDMA: - timeout = adev->sdma_timeout; - break; - default: - timeout = adev->video_timeout; - break; - } - - r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, - num_hw_submission, amdgpu_job_hang_limit, - timeout, NULL, sched_score, ring->name); - if (r) { - DRM_ERROR("Failed to create scheduler on ring %s.\n", - ring->name); - return r; - } - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 899a47011a67..dd78402e3cb0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -60,7 +60,7 @@ static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev, struct ttm_resource_manager *man; man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); - return sysfs_emit(buf, "%llu\n", man->size * PAGE_SIZE); + return sysfs_emit(buf, "%llu\n", man->size); } /** @@ -77,8 +77,9 @@ static ssize_t amdgpu_mem_info_gtt_used_show(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); + struct ttm_resource_manager *man = &adev->mman.gtt_mgr.manager; - return sysfs_emit(buf, "%llu\n", amdgpu_gtt_mgr_usage(&adev->mman.gtt_mgr)); + return sysfs_emit(buf, "%llu\n", ttm_resource_manager_usage(man)); } static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO, @@ -130,20 +131,17 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man, struct amdgpu_gtt_node *node; int r; - if (!(place->flags & TTM_PL_FLAG_TEMPORARY) && - atomic64_add_return(num_pages, &mgr->used) > man->size) { - atomic64_sub(num_pages, &mgr->used); - return -ENOSPC; - } - node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL); - if (!node) { - r = -ENOMEM; - goto err_out; - } + if (!node) + return -ENOMEM; node->tbo = tbo; ttm_resource_init(tbo, place, &node->base.base); + if (!(place->flags & TTM_PL_FLAG_TEMPORARY) && + ttm_resource_manager_usage(man) > man->size) { + r = -ENOSPC; + goto err_free; + } if (place->lpfn) { spin_lock(&mgr->lock); @@ -169,11 +167,6 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man, err_free: ttm_resource_fini(man, &node->base.base); kfree(node); - -err_out: - if (!(place->flags & TTM_PL_FLAG_TEMPORARY)) - atomic64_sub(num_pages, &mgr->used); - return r; } @@ -196,26 +189,11 @@ static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man, drm_mm_remove_node(&node->base.mm_nodes[0]); spin_unlock(&mgr->lock); - if (!(res->placement & TTM_PL_FLAG_TEMPORARY)) - atomic64_sub(res->num_pages, &mgr->used); - ttm_resource_fini(man, res); kfree(node); |