From 6492e1b07c03397f85bd6dc0e230ea6cd9394635 Mon Sep 17 00:00:00 2001 From: yipechai Date: Tue, 4 Jan 2022 13:35:37 +0800 Subject: drm/amdgpu: Unify ras block interface for each ras block 1. Define unified ops interface for each block. 2. Add ras_block_match function pointer in ops interface, each ras block can customize specail match function to identify itself. 3. Add amdgpu_ras_block_match_default new function. If a ras block doesn't define .ras_block_match, default execute amdgpu_ras_block_match_default to identify this ras block. 4. Define unified basic ras block data for each ras block. 5. Create dedicated amdgpu device ras block link list to manage all of the ras blocks. 6. Add amdgpu_ras_register_ras_block new function interface for each ras block to register itself to ras controlling block. Signed-off-by: yipechai Reviewed-by: Hawking Zhang Reviewed-by: John Clements Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 1c708122d492..f66122fdf477 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -484,6 +484,33 @@ struct ras_debug_if { }; int op; }; + +struct amdgpu_ras_block_object { + /* block name */ + char name[32]; + + enum amdgpu_ras_block block; + + uint32_t sub_block_index; + + /* ras block link */ + struct list_head node; + + int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj, enum amdgpu_ras_block block, uint32_t sub_block_index); + int (*ras_late_init)(struct amdgpu_device *adev, void *ras_info); + void (*ras_fini)(struct amdgpu_device *adev); + const struct amdgpu_ras_block_hw_ops *hw_ops; +}; + +struct amdgpu_ras_block_hw_ops { + int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if); + void (*query_ras_error_count)(struct amdgpu_device *adev,void *ras_error_status); + void (*query_ras_error_status)(struct amdgpu_device *adev); + void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status); + void (*reset_ras_error_count)(struct amdgpu_device *adev); + void (*reset_ras_error_status)(struct amdgpu_device *adev); +}; + /* work flow * vbios * 1: ras feature enable (enabled by default) @@ -667,4 +694,5 @@ const char *get_ras_block_str(struct ras_common_if *ras_block); bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev); +int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct amdgpu_ras_block_object* ras_block_obj); #endif -- cgit v1.2.3 From 7cab2124058d2f5f048f435a4631e176dcd1430d Mon Sep 17 00:00:00 2001 From: yipechai Date: Tue, 4 Jan 2022 13:39:15 +0800 Subject: drm/amdgpu: Modify the compilation failed problem when other ras blocks' .h include amdgpu_ras.h Modify the compilation failed problem when other ras blocks' .h include amdgpu_ras.h. v2: squash in forward declaration warning fix (Alex) Signed-off-by: yipechai Reviewed-by: Hawking Zhang Reviewed-by: John Clements Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 34 ++++++++++----------------------- 1 file changed, 10 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index f66122fdf477..7a4d82378205 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -26,11 +26,11 @@ #include #include -#include "amdgpu.h" -#include "amdgpu_psp.h" #include "ta_ras_if.h" #include "amdgpu_ras_eeprom.h" +struct amdgpu_iv_entry; + #define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0) enum amdgpu_ras_block { @@ -525,19 +525,6 @@ struct amdgpu_ras_block_hw_ops { * 8: feature disable */ -#define amdgpu_ras_get_context(adev) ((adev)->psp.ras_context.ras) -#define amdgpu_ras_set_context(adev, ras_con) ((adev)->psp.ras_context.ras = (ras_con)) - -/* check if ras is supported on block, say, sdma, gfx */ -static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, - unsigned int block) -{ - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - - if (block >= AMDGPU_RAS_BLOCK_COUNT) - return 0; - return ras && (adev->ras_enabled & (1 << block)); -} int amdgpu_ras_recovery_init(struct amdgpu_device *adev); @@ -554,15 +541,6 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev); -static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) -{ - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - - if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) - schedule_work(&ras->recovery_work); - return 0; -} - static inline enum ta_ras_block amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) { switch (block) { @@ -694,5 +672,13 @@ const char *get_ras_block_str(struct ras_common_if *ras_block); bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev); +int amdgpu_ras_is_supported(struct amdgpu_device *adev, unsigned int block); + +int amdgpu_ras_reset_gpu(struct amdgpu_device *adev); + +struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev); + +int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras* ras_con); + int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct amdgpu_ras_block_object* ras_block_obj); #endif -- cgit v1.2.3 From b6efdb02d23ef615464cd0077c211b40a1faca26 Mon Sep 17 00:00:00 2001 From: yipechai Date: Fri, 14 Jan 2022 10:24:30 +0800 Subject: drm/amdgpu: Fix the code style warnings in amdgpu_ras MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the code style warnings in amdgpu_ras: 1. ERROR: space required before the open parenthesis '('. 2. WARNING: line length of xxx exceeds 100 columns. 3. ERROR: "foo* bar" should be "foo *bar". 4. WARNING: unnecessary whitespace before a quoted newline. 5. WARNING: space prohibited before semicolon. 6. WARNING: suspect code indent for conditional statements. 7. WARNING: braces {} are not necessary for single statement blocks. Signed-off-by: yipechai Reviewed-by: Tao Zhou Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 7a4d82378205..a51a281bd91a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -496,7 +496,8 @@ struct amdgpu_ras_block_object { /* ras block link */ struct list_head node; - int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj, enum amdgpu_ras_block block, uint32_t sub_block_index); + int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj, + enum amdgpu_ras_block block, uint32_t sub_block_index); int (*ras_late_init)(struct amdgpu_device *adev, void *ras_info); void (*ras_fini)(struct amdgpu_device *adev); const struct amdgpu_ras_block_hw_ops *hw_ops; @@ -504,7 +505,7 @@ struct amdgpu_ras_block_object { struct amdgpu_ras_block_hw_ops { int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if); - void (*query_ras_error_count)(struct amdgpu_device *adev,void *ras_error_status); + void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status); void (*query_ras_error_status)(struct amdgpu_device *adev); void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status); void (*reset_ras_error_count)(struct amdgpu_device *adev); @@ -678,7 +679,8 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev); struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev); -int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras* ras_con); +int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con); -int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct amdgpu_ras_block_object* ras_block_obj); +int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, + struct amdgpu_ras_block_object *ras_block_obj); #endif -- cgit v1.2.3 From d5e8ff5f7b2a41d503914d4896ed3c6b3befe933 Mon Sep 17 00:00:00 2001 From: yipechai Date: Sat, 29 Jan 2022 17:09:08 +0800 Subject: drm/amdgpu: Fixed the defect of soft lock caused by infinite loop 1. The infinite loop case only occurs on multiple cards support ras functions. 2. The explanation of root cause refer to commit 76641cbbf196 ("drm/amdgpu: Add judgement to avoid infinite loop"). 3. Create new node to manage each unique ras instance to guarantee each device .ras_list is completely independent. 4. Fixes: commit 7a6b8ab3231b51 ("drm/amdgpu: Unify ras block interface for each ras block"). 5. The soft locked logs are as follows: [ 262.165690] CPU: 93 PID: 758 Comm: kworker/93:1 Tainted: G OE 5.13.0-27-generic #29~20.04.1-Ubuntu [ 262.165695] Hardware name: Supermicro AS -4124GS-TNR/H12DSG-O-CPU, BIOS T20200717143848 07/17/2020 [ 262.165698] Workqueue: events amdgpu_ras_do_recovery [amdgpu] [ 262.165980] RIP: 0010:amdgpu_ras_get_ras_block+0x86/0xd0 [amdgpu] [ 262.166239] Code: 68 d8 4c 8d 71 d8 48 39 c3 74 54 49 8b 45 38 48 85 c0 74 32 44 89 fa 44 89 e6 4c 89 ef e8 82 e4 9b dc 85 c0 74 3c 49 8b 46 28 <49> 8d 56 28 4d 89 f5 48 83 e8 28 48 39 d3 74 25 49 89 c6 49 8b 45 [ 262.166243] RSP: 0018:ffffac908fa87d80 EFLAGS: 00000202 [ 262.166247] RAX: ffffffffc1394248 RBX: ffff91e4ab8d6e20 RCX: ffffffffc1394248 [ 262.166249] RDX: ffff91e4aa356e20 RSI: 000000000000000e RDI: ffff91e4ab8c0000 [ 262.166252] RBP: ffffac908fa87da8 R08: 0000000000000007 R09: 0000000000000001 [ 262.166254] R10: ffff91e4930b64ec R11: 0000000000000000 R12: 000000000000000e [ 262.166256] R13: ffff91e4aa356df8 R14: ffffffffc1394320 R15: 0000000000000003 [ 262.166258] FS: 0000000000000000(0000) GS:ffff92238fb40000(0000) knlGS:0000000000000000 [ 262.166261] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 262.166264] CR2: 00000001004865d0 CR3: 000000406d796000 CR4: 0000000000350ee0 [ 262.166267] Call Trace: [ 262.166272] amdgpu_ras_do_recovery+0x130/0x290 [amdgpu] [ 262.166529] ? psi_task_switch+0xd2/0x250 [ 262.166537] ? __switch_to+0x11d/0x460 [ 262.166542] ? __switch_to_asm+0x36/0x70 [ 262.166549] process_one_work+0x220/0x3c0 [ 262.166556] worker_thread+0x4d/0x3f0 [ 262.166560] ? process_one_work+0x3c0/0x3c0 [ 262.166563] kthread+0x12b/0x150 [ 262.166568] ? set_kthread_struct+0x40/0x40 [ 262.166571] ret_from_fork+0x22/0x30 Signed-off-by: yipechai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index a51a281bd91a..a55743b12d57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -493,9 +493,6 @@ struct amdgpu_ras_block_object { uint32_t sub_block_index; - /* ras block link */ - struct list_head node; - int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj, enum amdgpu_ras_block block, uint32_t sub_block_index); int (*ras_late_init)(struct amdgpu_device *adev, void *ras_info); -- cgit v1.2.3 From bdb3489cfca16815e9a737359e9e90a4af5d0ff3 Mon Sep 17 00:00:00 2001 From: yipechai Date: Sun, 30 Jan 2022 17:03:32 +0800 Subject: drm/amdgpu: Optimize xxx_ras_late_init/xxx_ras_late_fini for each ras block 1. Define amdgpu_ras_block_late_init to create sysfs nodes and interrupt handles. 2. Define amdgpu_ras_block_late_fini to remove sysfs nodes and interrupt handles. 3. Replace ras block variable members in struct amdgpu_ras_block_object with struct ras_common_if, which can make it easy to associate each ras block instance with each ras block functional interface. 4. Add .ras_cb to struct amdgpu_ras_block_object. 5. Change each ras block to fit for the changement of struct amdgpu_ras_block_object. Signed-off-by: yipechai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index a55743b12d57..8b94b556baf6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -486,17 +486,13 @@ struct ras_debug_if { }; struct amdgpu_ras_block_object { - /* block name */ - char name[32]; - - enum amdgpu_ras_block block; - - uint32_t sub_block_index; + struct ras_common_if ras_comm; int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj, enum amdgpu_ras_block block, uint32_t sub_block_index); int (*ras_late_init)(struct amdgpu_device *adev, void *ras_info); void (*ras_fini)(struct amdgpu_device *adev); + ras_ih_cb ras_cb; const struct amdgpu_ras_block_hw_ops *hw_ops; }; @@ -605,10 +601,17 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block, struct ras_fs_if *fs_info, struct ras_ih_if *ih_info); + +int amdgpu_ras_block_late_init(struct amdgpu_device *adev, + struct ras_common_if *ras_block); + void amdgpu_ras_late_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block, struct ras_ih_if *ih_info); +void amdgpu_ras_block_late_fini(struct amdgpu_device *adev, + struct ras_common_if *ras_block); + int amdgpu_ras_feature_enable(struct amdgpu_device *adev, struct ras_common_if *head, bool enable); -- cgit v1.2.3 From 9252d33df597a60416f3718b9b41457657c8540c Mon Sep 17 00:00:00 2001 From: yipechai Date: Wed, 9 Feb 2022 11:05:27 +0800 Subject: drm/amdgpu: Optimize operating sysfs and interrupt function interface in amdgpu_ras.c In order to reduce redundant struct conversion, modify operating sysfs and interrupt function interface parameters. Signed-off-by: yipechai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 8b94b556baf6..ae8741ac526f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -619,7 +619,7 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev, struct ras_common_if *head, bool enable); int amdgpu_ras_sysfs_create(struct amdgpu_device *adev, - struct ras_fs_if *head); + struct ras_common_if *head); int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev, struct ras_common_if *head); @@ -636,10 +636,10 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, struct ras_inject_if *info); int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev, - struct ras_ih_if *info); + struct ras_common_if *head); int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev, - struct ras_ih_if *info); + struct ras_common_if *head); int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, struct ras_dispatch_if *info); -- cgit v1.2.3 From 563285c85ecaa1fcecf304dabf87cbeee1ddbc3f Mon Sep 17 00:00:00 2001 From: yipechai Date: Wed, 9 Feb 2022 11:15:29 +0800 Subject: drm/amdgpu: Merge amdgpu_ras_late_init/amdgpu_ras_late_fini to amdgpu_ras_block_late_init/amdgpu_ras_block_late_fini 1. Merge amdgpu_ras_late_init to amdgpu_ras_block_late_init. 2. Remove amdgpu_ras_late_init since no ras block calls amdgpu_ras_late_init. 3. Merge amdgpu_ras_late_fini to amdgpu_ras_block_late_fini. 4. Remove amdgpu_ras_late_fini since no ras block calls amdgpu_ras_late_fini. Signed-off-by: yipechai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index ae8741ac526f..5de567c6a8f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -597,18 +597,10 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) { int amdgpu_ras_init(struct amdgpu_device *adev); int amdgpu_ras_fini(struct amdgpu_device *adev); int amdgpu_ras_pre_fini(struct amdgpu_device *adev); -int amdgpu_ras_late_init(struct amdgpu_device *adev, - struct ras_common_if *ras_block, - struct ras_fs_if *fs_info, - struct ras_ih_if *ih_info); int amdgpu_ras_block_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); -void amdgpu_ras_late_fini(struct amdgpu_device *adev, - struct ras_common_if *ras_block, - struct ras_ih_if *ih_info); - void amdgpu_ras_block_late_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block); -- cgit v1.2.3 From 4e9b1fa5a2757d11a5c40eed2b2b4837dcb2f12e Mon Sep 17 00:00:00 2001 From: yipechai Date: Mon, 14 Feb 2022 14:12:55 +0800 Subject: drm/amdgpu: Modify .ras_late_init function pointer parameter Modify .ras_late_init function pointer parameter so that it can remove redundant intermediate calls in some ras blocks. Signed-off-by: yipechai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 5de567c6a8f7..837d1b79a9cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -490,7 +490,7 @@ struct amdgpu_ras_block_object { int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj, enum amdgpu_ras_block block, uint32_t sub_block_index); - int (*ras_late_init)(struct amdgpu_device *adev, void *ras_info); + int (*ras_late_init)(struct amdgpu_device *adev, struct ras_common_if *ras_block); void (*ras_fini)(struct amdgpu_device *adev); ras_ih_cb ras_cb; const struct amdgpu_ras_block_hw_ops *hw_ops; -- cgit v1.2.3 From 867e24ca4945249baf34ea07ae6b27ca927210a1 Mon Sep 17 00:00:00 2001 From: yipechai Date: Mon, 14 Feb 2022 14:53:37 +0800 Subject: drm/amdgpu: define amdgpu_ras_late_init to call all ras blocks' .ras_late_init Define amdgpu_ras_late_init to call all ras blocks' .ras_late_init. Signed-off-by: yipechai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 837d1b79a9cb..143a83043d7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -595,6 +595,7 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) { /* called in ip_init and ip_fini */ int amdgpu_ras_init(struct amdgpu_device *adev); +int amdgpu_ras_late_init(struct amdgpu_device *adev); int amdgpu_ras_fini(struct amdgpu_device *adev); int amdgpu_ras_pre_fini(struct amdgpu_device *adev); -- cgit v1.2.3 From 01d468d9a420152e4a1270992e69a37ea0c98e04 Mon Sep 17 00:00:00 2001 From: yipechai Date: Thu, 17 Feb 2022 14:52:20 +0800 Subject: drm/amdgpu: Modify .ras_fini function pointer parameter Modify .ras_fini function pointer parameter so that we can remove redundant intermediate calls in some ras blocks. Signed-off-by: yipechai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 143a83043d7c..7cddaad90d6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -491,7 +491,7 @@ struct amdgpu_ras_block_object { int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj, enum amdgpu_ras_block block, uint32_t sub_block_index); int (*ras_late_init)(struct amdgpu_device *adev, struct ras_common_if *ras_block); - void (*ras_fini)(struct amdgpu_device *adev); + void (*ras_fini)(struct amdgpu_device *adev, struct ras_common_if *ras_block); ras_ih_cb ras_cb; const struct amdgpu_ras_block_hw_ops *hw_ops; }; -- cgit v1.2.3 From 69691c823531c36c7283ecaa040e99e9c12ece07 Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Thu, 3 Mar 2022 17:56:33 +0800 Subject: drm/amdgpu: message smu to update bad channel info It should notice SMU to update bad channel info when detected uncorrectable error in UMC block Signed-off-by: Stanley.Yang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 7cddaad90d6d..9314fde81e68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -374,6 +374,9 @@ struct amdgpu_ras { /* record umc error info queried from smu */ struct umc_ecc_info umc_ecc; + + /* Indicates smu whether need update bad channel info */ + bool update_channel_flag; }; struct ras_fs_data { -- cgit v1.2.3