diff options
| author | Dave Airlie <airlied@redhat.com> | 2021-10-27 10:38:38 +1000 |
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2021-10-27 10:38:41 +1000 |
| commit | 367fe8dc299c968eabdae890536d55d80ea55e01 (patch) | |
| tree | f5c348ea306b554494fc5b5904086a1d1c72eb55 | |
| parent | 6f2f7c83303d2227f47551423e507d77d9ea01c7 (diff) | |
| parent | 41ad36623fabe7d02c9f89aff077dd4c8ba5d602 (diff) | |
| download | linux-367fe8dc299c968eabdae890536d55d80ea55e01.tar.gz linux-367fe8dc299c968eabdae890536d55d80ea55e01.tar.bz2 linux-367fe8dc299c968eabdae890536d55d80ea55e01.zip | |
Merge tag 'amd-drm-next-5.16-2021-10-22' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.16-2021-10-22:
amdgpu:
- PSP fix for resume
- XGMI fixes
- Interrupt fix in device tear down
- Renoir USB-C DP alt mode fix for resume
- DP 2.0 fixes
- Yellow Carp display fixes
- Misc display fixes
- RAS fixes
- IP Discovery enumeration fixes
- VGH fixes
- SR-IOV fixes
- Revert ChromeOS workaround in display code
- Cyan Skillfish fixes
amdkfd:
- Fix error handling in gpu memory allocation
- Fix build warnings with some configs
- SVM fixes
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211022183112.4574-1-alexander.deucher@amd.com
89 files changed, 1336 insertions, 1540 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index fbfeca76f2af..a6c10e50605e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -902,6 +902,7 @@ F: include/uapi/linux/psp-sev.h AMD DISPLAY CORE M: Harry Wentland <harry.wentland@amd.com> M: Leo Li <sunpeng.li@amd.com> +M: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com> L: amd-gfx@lists.freedesktop.org S: Supported T: git https://gitlab.freedesktop.org/agd5f/linux.git diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 8d0748184a14..653726588956 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -73,10 +73,8 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ - vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \ - arct_reg_init.o navi12_reg_init.o mxgpu_nv.o sienna_cichlid_reg_init.o vangogh_reg_init.o \ - nbio_v7_2.o dimgrey_cavefish_reg_init.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o \ - beige_goby_reg_init.o yellow_carp_reg_init.o cyan_skillfish_reg_init.o + vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \ + nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o # add DF block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index 148f6c3343ab..bcfdb63b1d42 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -307,6 +307,8 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev) adev->ip_blocks[i].status.late_initialized = true; } + amdgpu_ras_set_error_query_ready(adev, true); + amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 054c1a224def..cdf46bd0d8d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1503,7 +1503,7 @@ allocate_init_user_pages_failed: remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); drm_vma_node_revoke(&gobj->vma_node, drm_priv); err_node_allow: - amdgpu_bo_unref(&bo); + drm_gem_object_put(gobj); /* Don't unreserve system mem limit twice */ goto err_reserve_limit; err_bo_create: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 239e71174855..69e0eccc00fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2398,10 +2398,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (!adev->gmc.xgmi.pending_reset) amdgpu_amdkfd_device_init(adev); - r = amdgpu_amdkfd_resume_iommu(adev); - if (r) - goto init_failed; - amdgpu_fru_get_product_info(adev); init_failed: @@ -3839,10 +3835,10 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) amdgpu_fbdev_fini(adev); - amdgpu_irq_fini_hw(adev); - amdgpu_device_ip_fini_early(adev); + amdgpu_irq_fini_hw(adev); + ttm_device_clear_dma_mappings(&adev->mman.bdev); amdgpu_gart_dummy_page_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 2bebd2ce6474..208a784475bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -108,6 +108,8 @@ static const char *hw_id_names[HW_ID_MAX] = { [HDP_HWID] = "HDP", [SDMA0_HWID] = "SDMA0", [SDMA1_HWID] = "SDMA1", + [SDMA2_HWID] = "SDMA2", + [SDMA3_HWID] = "SDMA3", [ISP_HWID] = "ISP", [DBGU_IO_HWID] = "DBGU_IO", [DF_HWID] = "DF", @@ -505,6 +507,10 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) break; } } + /* some IP discovery tables on Navy Flounder don't have this set correctly */ + if ((adev->ip_versions[UVD_HWIP][1] == IP_VERSION(3, 0, 1)) && + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 2))) + adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1; if (vcn_harvest_count == adev->vcn.num_vcn_inst) { adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK; @@ -736,6 +742,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(1, 0, 1): case IP_VERSION(2, 0, 2): case IP_VERSION(2, 0, 0): + case IP_VERSION(2, 0, 3): case IP_VERSION(2, 1, 0): case IP_VERSION(3, 0, 0): case IP_VERSION(3, 0, 2): @@ -745,8 +752,6 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 1, 3): amdgpu_device_ip_block_add(adev, &dm_ip_block); break; - case IP_VERSION(2, 0, 3): - break; default: return -EINVAL; } @@ -1120,10 +1125,13 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(7, 4, 0): case IP_VERSION(7, 4, 1): - case IP_VERSION(7, 4, 4): adev->nbio.funcs = &nbio_v7_4_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; break; + case IP_VERSION(7, 4, 4): + adev->nbio.funcs = &nbio_v7_4_funcs; + adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg_ald; + break; case IP_VERSION(7, 2, 0): case IP_VERSION(7, 2, 1): case IP_VERSION(7, 5, 0): @@ -1134,12 +1142,15 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(2, 3, 0): case IP_VERSION(2, 3, 1): case IP_VERSION(2, 3, 2): + adev->nbio.funcs = &nbio_v2_3_funcs; + adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; + break; case IP_VERSION(3, 3, 0): case IP_VERSION(3, 3, 1): case IP_VERSION(3, 3, 2): case IP_VERSION(3, 3, 3): adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; + adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg_sc; break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 6b39e6c02dd8..fd04e83031d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -65,7 +65,6 @@ static int psp_securedisplay_terminate(struct psp_context *psp); * * This new sequence is required for * - Arcturus and onwards - * - Navi12 and onwards */ static void psp_check_pmfw_centralized_cstate_management(struct psp_context *psp) { @@ -77,7 +76,9 @@ static void psp_check_pmfw_centralized_cstate_management(struct psp_context *psp } switch (adev->ip_versions[MP0_HWIP][0]) { + case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 0, 5): case IP_VERSION(11, 0, 7): case IP_VERSION(11, 0, 9): case IP_VERSION(11, 0, 11): @@ -1291,6 +1292,29 @@ static int psp_ras_unload(struct psp_context *psp) return psp_ta_unload(psp, &psp->ras_context.context); } +static void psp_ras_ta_check_status(struct psp_context *psp) +{ + struct ta_ras_shared_memory *ras_cmd = + (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; + + switch (ras_cmd->ras_status) { + case TA_RAS_STATUS__ERROR_UNSUPPORTED_IP: + dev_warn(psp->adev->dev, + "RAS WARNING: cmd failed due to unsupported ip\n"); + break; + case TA_RAS_STATUS__ERROR_UNSUPPORTED_ERROR_INJ: + dev_warn(psp->adev->dev, + "RAS WARNING: cmd failed due to unsupported error injection\n"); + break; + case TA_RAS_STATUS__SUCCESS: + break; + default: + dev_warn(psp->adev->dev, + "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status); + break; + } +} + int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) { struct ta_ras_shared_memory *ras_cmd; @@ -1325,10 +1349,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) dev_warn(psp->adev->dev, "RAS internal register access blocked\n"); - if (ras_cmd->ras_status == TA_RAS_STATUS__ERROR_UNSUPPORTED_IP) - dev_warn(psp->adev->dev, "RAS WARNING: cmd failed due to unsupported ip\n"); - else if (ras_cmd->ras_status) - dev_warn(psp->adev->dev, "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status); + psp_ras_ta_check_status(psp); } return ret; @@ -2622,6 +2643,12 @@ static int psp_resume(void *handle) goto failed; } + ret = psp_rl_load(adev); + if (ret) { + dev_err(adev->dev, "PSP load RL failed!\n"); + goto failed; + } + if (adev->gmc.xgmi.num_physical_nodes > 1) { ret = psp_xgmi_initialize(psp, false, true); /* Warning the XGMI seesion initialize failure diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index e8875351967e..08133de21fdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -112,7 +112,12 @@ static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con, static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, uint64_t addr); #ifdef CONFIG_X86_MCE_AMD -static void amdgpu_register_bad_pages_mca_notifier(void); +static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev); +struct mce_notifier_adev_list { + struct amdgpu_device *devs[MAX_GPU_INSTANCE]; + int num_gpu; +}; +static struct mce_notifier_adev_list mce_adev_list; #endif void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready) @@ -2108,7 +2113,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) #ifdef CONFIG_X86_MCE_AMD if ((adev->asic_type == CHIP_ALDEBARAN) && (adev->gmc.xgmi.connected_to_cpu)) - amdgpu_register_bad_pages_mca_notifier(); + amdgpu_register_bad_pages_mca_notifier(adev); #endif return 0; @@ -2605,24 +2610,18 @@ void amdgpu_release_ras_context(struct amdgpu_device *adev) #ifdef CONFIG_X86_MCE_AMD static struct amdgpu_device *find_adev(uint32_t node_id) { - struct amdgpu_gpu_in |
