summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c140
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c272
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c133
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c282
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c315
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c206
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c105
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c69
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c73
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_7.c58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c4
30 files changed, 767 insertions, 1111 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 3da27436922c..b14800ac179e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -1053,6 +1053,10 @@ bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev)
{
if (adev->flags & AMD_IS_APU)
return false;
+
+ if (amdgpu_sriov_vf(adev))
+ return false;
+
return pm_suspend_target_state != PM_SUSPEND_TO_IDLE;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index c7b1a2dfde13..9e98f3866edc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -756,11 +756,7 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bo
{
struct ras_err_data err_data = {0, 0, 0, NULL};
- /* CPU MCA will handle page retirement if connected_to_cpu is 1 */
- if (!adev->gmc.xgmi.connected_to_cpu)
- amdgpu_umc_poison_handler(adev, &err_data, reset);
- else if (reset)
- amdgpu_amdkfd_gpu_reset(adev);
+ amdgpu_umc_poison_handler(adev, &err_data, reset);
}
bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index c04ea7f1e819..ab8f970b2849 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3154,7 +3154,8 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
+ (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
r = adev->ip_blocks[i].version->funcs->resume(adev);
if (r) {
@@ -4069,12 +4070,20 @@ static void amdgpu_device_evict_resources(struct amdgpu_device *adev)
int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
{
struct amdgpu_device *adev = drm_to_adev(dev);
+ int r = 0;
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
adev->in_suspend = true;
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_fini_data_exchange(adev);
+ r = amdgpu_virt_request_full_gpu(adev, false);
+ if (r)
+ return r;
+ }
+
if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
DRM_WARN("smart shift update failed\n");
@@ -4098,6 +4107,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
amdgpu_device_ip_suspend_phase2(adev);
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_release_full_gpu(adev, false);
+
return 0;
}
@@ -4116,6 +4128,12 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
struct amdgpu_device *adev = drm_to_adev(dev);
int r = 0;
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_request_full_gpu(adev, true);
+ if (r)
+ return r;
+ }
+
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
@@ -4130,6 +4148,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
}
r = amdgpu_device_ip_resume(adev);
+
+ /* no matter what r is, always need to properly release full GPU */
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_init_data_exchange(adev);
+ amdgpu_virt_release_full_gpu(adev, true);
+ }
+
if (r) {
dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
return r;
@@ -5576,9 +5601,9 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
resource_size_t aper_limit =
adev->gmc.aper_base + adev->gmc.aper_size - 1;
- bool p2p_access = !adev->gmc.xgmi.connected_to_cpu &&
- !(pci_p2pdma_distance_many(adev->pdev,
- &peer_adev->dev, 1, true) < 0);
+ bool p2p_access =
+ !adev->gmc.xgmi.connected_to_cpu &&
+ !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 9fa2a5ceb77d..3993e6134914 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -229,7 +229,7 @@ static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, ui
return r;
}
- memcpy((u8 *)binary, (u8 *)fw->data, adev->mman.discovery_tmr_size);
+ memcpy((u8 *)binary, (u8 *)fw->data, fw->size);
release_firmware(fw);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 782cbca37538..7bd8e33b14be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -58,7 +58,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
int r;
- if (pci_p2pdma_distance_many(adev->pdev, &attach->dev, 1, true) < 0)
+ if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
attach->peer2peer = false;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 8adeb7469f1e..d0d99ed607dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -400,7 +400,6 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
/* We are not protected by ring lock when reading the last sequence
* but it's ok to report slightly wrong fence count here.
*/
- amdgpu_fence_process(ring);
emitted = 0x100000000ull;
emitted -= atomic_read(&ring->fence_drv.last_seq);
emitted += READ_ONCE(ring->fence_drv.sync_seq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index ceb91469958a..9546adc8a76f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -23,6 +23,7 @@
*
*/
+#include <linux/firmware.h>
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include "amdgpu_rlc.h"
@@ -865,3 +866,142 @@ int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
}
return amdgpu_num_kcq;
}
+
+void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
+ uint32_t ucode_id)
+{
+ const struct gfx_firmware_header_v1_0 *cp_hdr;
+ const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
+ struct amdgpu_firmware_info *info = NULL;
+ const struct firmware *ucode_fw;
+ unsigned int fw_size;
+
+ switch (ucode_id) {
+ case AMDGPU_UCODE_ID_CP_PFP:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.pfp_fw->data;
+ adev->gfx.pfp_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.pfp_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.pfp_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+ adev->gfx.pfp_fw_version =
+ le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
+ adev->gfx.pfp_feature_version =
+ le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
+ ucode_fw = adev->gfx.pfp_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.pfp_fw->data;
+ ucode_fw = adev->gfx.pfp_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_ME:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.me_fw->data;
+ adev->gfx.me_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.me_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.me_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ adev->gfx.me_fw_version =
+ le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
+ adev->gfx.me_feature_version =
+ le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
+ ucode_fw = adev->gfx.me_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.me_fw->data;
+ ucode_fw = adev->gfx.me_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_CE:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.ce_fw->data;
+ adev->gfx.ce_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.ce_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.ce_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec_fw->data;
+ adev->gfx.mec_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.mec_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
+ le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1_JT:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec_fw->data;
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC2:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec2_fw->data;
+ adev->gfx.mec2_fw_version =
+ le32_to_cpu(cp_hdr->header.ucode_version);
+ adev->gfx.mec2_feature_version =
+ le32_to_cpu(cp_hdr->ucode_feature_version);
+ ucode_fw = adev->gfx.mec2_fw;
+ fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
+ le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC2_JT:
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)
+ adev->gfx.mec2_fw->data;
+ ucode_fw = adev->gfx.mec2_fw;
+ fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ adev->gfx.mec_fw_version =
+ le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
+ adev->gfx.mec_feature_version =
+ le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
+ break;
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
+ case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
+ cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
+ adev->gfx.mec_fw->data;
+ ucode_fw = adev->gfx.mec_fw;
+ fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
+ break;
+ default:
+ break;
+ }
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[ucode_id];
+ info->ucode_id = ucode_id;
+ info->fw = ucode_fw;
+ adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index b91ab919ee70..832b3807f1d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -426,4 +426,6 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg);
void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev);
+void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index aebc384531ac..34233a74248c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -572,45 +572,15 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
{
struct amdgpu_gmc *gmc = &adev->gmc;
-
- switch (adev->ip_versions[GC_HWIP][0]) {
- case IP_VERSION(9, 0, 1):
- case IP_VERSION(9, 3, 0):
- case IP_VERSION(9, 4, 0):
- case IP_VERSION(9, 4, 1):
- case IP_VERSION(9, 4, 2):
- case IP_VERSION(10, 3, 3):
- case IP_VERSION(10, 3, 4):
- case IP_VERSION(10, 3, 5):
- case IP_VERSION(10, 3, 6):
- case IP_VERSION(10, 3, 7):
- /*
- * noretry = 0 will cause kfd page fault tests fail
- * for some ASICs, so set default to 1 for these ASICs.
- */
- if (amdgpu_noretry == -1)
- gmc->noretry = 1;
- else
- gmc->noretry = amdgpu_noretry;
- break;
- default:
- /* Raven currently has issues with noretry
- * regardless of what we decide for other
- * asics, we should leave raven with
- * noretry = 0 until we root cause the
- * issues.
- *
- * default this to 0 for now, but we may want
- * to change this in the future for certain
- * GPUs as it can increase performance in
- * certain cases.
- */
- if (amdgpu_noretry == -1)
- gmc->noretry = 0;
- else
- gmc->noretry = amdgpu_noretry;
- break;
- }
+ uint32_t gc_ver = adev->ip_versions[GC_HWIP][0];
+ bool noretry_default = (gc_ver == IP_VERSION(9, 0, 1) ||
+ gc_ver == IP_VERSION(9, 3, 0) ||
+ gc_ver == IP_VERSION(9, 4, 0) ||
+ gc_ver == IP_VERSION(9, 4, 1) ||
+ gc_ver == IP_VERSION(9, 4, 2) ||
+ gc_ver >= IP_VERSION(10, 3, 0));
+
+ gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry;
}
void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 7b46f6bf4187..ad980f4b66e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -222,6 +222,8 @@ struct mes_add_queue_input {
uint64_t tba_addr;
uint64_t tma_addr;
uint32_t is_kfd_process;
+ uint32_t is_aql_queue;
+ uint32_t queue_size;
};
struct mes_remove_queue_input {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index e55f106621ef..ccebd8e2a2d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2720,7 +2720,8 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
/* Need disable ras on all IPs here before ip [hw/sw]fini */
- amdgpu_ras_disable_all_features(adev, 0);
+ if (con->features)
+ amdgpu_ras_disable_all_features(adev, 0);
amdgpu_ras_recovery_fini(adev);
return 0;
}
@@ -2833,11 +2834,8 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
struct mce *m = (struct mce *)data;
struct amdgpu_device *adev = NULL;
uint32_t gpu_id = 0;
- uint32_t umc_inst = 0;
- uint32_t ch_inst, channel_index = 0;
+ uint32_t umc_inst = 0, ch_inst = 0;
struct ras_err_data err_data = {0, 0, 0, NULL};
- struct eeprom_table_record err_rec;
- uint64_t retired_page;
/*
* If the error was generated in UMC_V2, which belongs to GPU UMCs,
@@ -2876,21 +2874,22 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",
umc_inst, ch_inst);
+ err_data.err_addr =
+ kcalloc(adev->umc.max_ras_err_cnt_per_query,
+ sizeof(struct eeprom_table_record), GFP_KERNEL);
+ if(!err_data.err_addr) {
+ dev_warn(adev->dev, "Failed to alloc memory for "
+ "umc error address record in mca notifier!\n");
+ return NOTIFY_DONE;
+ }
+
/*
* Translate UMC channel address to Physical address
*/
- channel_index =
- adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num
- + ch_inst];
-
- retired_page = ADDR_OF_8KB_BLOCK(m->addr) |
- ADDR_OF_256B_BLOCK(channel_index) |
- OFFSET_IN_256B_BLOCK(m->addr);
-
- memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
- err_data.err_addr = &err_rec;
- amdgpu_umc_fill_error_record(&err_data, m->addr,
- retired_page, channel_index, umc_inst);
+ if (adev->umc.ras &&
+ adev->umc.ras->convert_ras_error_address)
+ adev->umc.ras->convert_ras_error_address(adev,
+ &err_data, 0, ch_inst, umc_inst, m->addr);
if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
@@ -2898,6 +2897,7 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
amdgpu_ras_save_bad_pages(adev);
}
+ kfree(err_data.err_addr);
return NOTIFY_OK;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index dc43fcb93eac..f5318fedf2f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -113,7 +113,8 @@ static inline bool amdgpu_reset_get_reset_domain(struct amdgpu_reset_domain *dom
static inline void amdgpu_reset_put_reset_domain(struct amdgpu_reset_domain *domain)
{
- kref_put(&domain->refcount, amdgpu_reset_destroy_reset_domain);
+ if (domain)
+ kref_put(&domain->refcount, amdgpu_reset_destroy_reset_domain);
}
static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *domain,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
index 6373bfb47d55..012b72d00e04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
@@ -272,3 +272,275 @@ void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev)
&adev->gfx.rlc.cp_table_gpu_addr,
(void **)&adev->gfx.rlc.cp_table_ptr);
}
+
+static int amdgpu_gfx_rlc_init_microcode_v2_0(struct amdgpu_device *adev)
+{
+ const struct common_firmware_header *common_hdr;
+ const struct rlc_firmware_header_v2_0 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+ unsigned int *tmp;
+ unsigned int i;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+
+ adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
+ adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
+ adev->gfx.rlc.save_and_restore_offset =
+ le32_to_cpu(rlc_hdr->save_and_restore_offset);
+ adev->gfx.rlc.clear_state_descriptor_offset =
+ le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
+ adev->gfx.rlc.avail_scratch_ram_locations =
+ le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
+ adev->gfx.rlc.reg_restore_list_size =
+ le32_to_cpu(rlc_hdr->reg_restore_list_size);
+ adev->gfx.rlc.reg_list_format_start =
+ le32_to_cpu(rlc_hdr->reg_list_format_start);
+ adev->gfx.rlc.reg_list_format_separate_start =
+ le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
+ adev->gfx.rlc.starting_offsets_start =
+ le32_to_cpu(rlc_hdr->starting_offsets_start);
+ adev->gfx.rlc.reg_list_format_size_bytes =
+ le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
+ adev->gfx.rlc.reg_list_size_bytes =
+ le32_to_cpu(rlc_hdr->reg_list_size_bytes);
+ adev->gfx.rlc.register_list_format =
+ kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
+ adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
+ if (!adev->gfx.rlc.register_list_format) {
+ dev_err(adev->dev, "failed to allocate memory for rlc register_list_format\n");
+ return -ENOMEM;
+ }
+
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
+ for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
+
+ adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
+
+ tmp = (unsigned int *)((uintptr_t)rlc_hdr +
+ le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
+ for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
+ adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
+ info->fw = adev->gfx.rlc_fw;
+ if (info->fw) {
+ common_hdr = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(common_hdr->ucode_size_bytes), PAGE_SIZE);
+ }
+ }
+
+ return 0;
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_1(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_1 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
+ adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
+ adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
+ adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
+ adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
+ adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
+ adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
+ adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
+ adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
+ adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
+ adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
+ adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
+ adev->gfx.rlc.reg_list_format_direct_reg_list_length =
+ le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.save_restore_list_cntl_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.save_restore_list_gpm_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.save_restore_list_srm_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_2(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_2 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc.rlc_iram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_iram_ucode_size_bytes);
+ adev->gfx.rlc.rlc_iram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_iram_ucode_offset_bytes);
+ adev->gfx.rlc.rlc_dram_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlc_dram_ucode_size_bytes);
+ adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.rlc_iram_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_IRAM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_IRAM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlc_iram_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.rlc_dram_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_DRAM];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_DRAM;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_3(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_3 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlcp_ucode_version = le32_to_cpu(rlc_hdr->rlcp_ucode_version);
+ adev->gfx.rlcp_ucode_feature_version = le32_to_cpu(rlc_hdr->rlcp_ucode_feature_version);
+ adev->gfx.rlc.rlcp_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcp_ucode_size_bytes);
+ adev->gfx.rlc.rlcp_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcp_ucode_offset_bytes);
+
+ adev->gfx.rlcv_ucode_version = le32_to_cpu(rlc_hdr->rlcv_ucode_version);
+ adev->gfx.rlcv_ucode_feature_version = le32_to_cpu(rlc_hdr->rlcv_ucode_feature_version);
+ adev->gfx.rlc.rlcv_ucode_size_bytes = le32_to_cpu(rlc_hdr->rlcv_ucode_size_bytes);
+ adev->gfx.rlc.rlcv_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlcv_ucode_offset_bytes);
+
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (adev->gfx.rlc.rlcp_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_P];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_P;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlcp_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.rlcv_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_V];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_V;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.rlcv_ucode_size_bytes, PAGE_SIZE);
+ }
+ }
+}
+
+static void amdgpu_gfx_rlc_init_microcode_v2_4(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_4 *rlc_hdr;
+ struct amdgpu_firmware_info *info;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes);