diff options
| author | Dave Airlie <airlied@redhat.com> | 2025-09-02 09:35:11 +1000 |
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2025-09-02 09:35:54 +1000 |
| commit | 14579a6f18506fbb3613d509b8291e3d13c13952 (patch) | |
| tree | f9eb0cab7854e9015f96a79f2a0f12ed75965894 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | |
| parent | ddcc2bb28878b5e7f7aeeb2b510629960b6714a5 (diff) | |
| parent | 585b2f685c56c5095cc22c7202bf74d8e9a73cdd (diff) | |
| download | linux-14579a6f18506fbb3613d509b8291e3d13c13952.tar.gz linux-14579a6f18506fbb3613d509b8291e3d13c13952.tar.bz2 linux-14579a6f18506fbb3613d509b8291e3d13c13952.zip | |
Merge tag 'amd-drm-next-6.18-2025-08-29' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.18-2025-08-29:
amdgpu:
- Replay fixes
- RAS updates
- VCN SRAM load fixes
- EDID read fixes
- eDP ALPM support
- AUX fixes
- Documenation updates
- Rework how PTE flags are generated
- DCE6 fixes
- VCN devcoredump cleanup
- MMHUB client id fixes
- SR-IOV fixes
- VRR fixes
- VCN 5.0.1 RAS support
- Backlight fixes
- UserQ fixes
- Misc code cleanups
- SMU 13.0.12 updates
- Expanded PCIe DPC support
- Expanded VCN reset support
- SMU 13.0.x Updates
- VPE per queue reset support
- Cusor rotation fix
- DSC fixes
- GC 12 MES TLB invalidation update
- Cursor fixes
- Non-DC TMDS clock validation fix
amdkfd:
- debugfs fixes
- Misc code cleanups
- Page migration fixes
- Partition fixes
- SVM fixes
radeon:
- Misc code cleanups
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://lore.kernel.org/r/20250829190848.1921648-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 28 |
1 files changed, 26 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 927d6bff734a..6cf0dfd38be8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -492,8 +492,15 @@ struct ras_ecc_err { struct ras_ecc_log_info { struct mutex lock; struct radix_tree_root de_page_tree; - uint64_t de_queried_count; - uint64_t prev_de_queried_count; + uint64_t de_queried_count; + uint64_t consumption_q_count; +}; + +struct ras_critical_region { + struct list_head node; + struct amdgpu_bo *bo; + uint64_t start; + uint64_t size; }; struct amdgpu_ras { @@ -515,6 +522,7 @@ struct amdgpu_ras { /* gpu recovery */ struct work_struct recovery_work; atomic_t in_recovery; + atomic_t rma_in_recovery; struct amdgpu_device *adev; /* error handler data */ struct ras_err_handler_data *eh_data; @@ -557,6 +565,7 @@ struct amdgpu_ras { struct mutex page_retirement_lock; atomic_t page_retirement_req_cnt; atomic_t poison_creation_count; + atomic_t poison_consumption_count; struct mutex page_rsv_lock; DECLARE_KFIFO(poison_fifo, struct ras_poison_msg, 128); struct ras_ecc_log_info umc_ecc_log; @@ -570,6 +579,17 @@ struct amdgpu_ras { struct ras_event_manager *event_mgr; uint64_t reserved_pages_in_bytes; + + pid_t init_task_pid; + char init_task_comm[TASK_COMM_LEN]; + + int bad_page_num; + + struct list_head critical_region_head; + struct mutex critical_region_lock; + + /* Protect poison injection */ + struct mutex poison_lock; }; struct ras_fs_data { @@ -608,6 +628,7 @@ struct ras_err_handler_data { struct eeprom_table_record *bps; /* the count of entries */ int count; + int count_saved; /* the space can place new entries */ int space_left; }; @@ -973,6 +994,9 @@ int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_ int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn); +int amdgpu_ras_add_critical_region(struct amdgpu_device *adev, struct amdgpu_bo *bo); +bool amdgpu_ras_check_critical_address(struct amdgpu_device *adev, uint64_t addr); + int amdgpu_ras_put_poison_req(struct amdgpu_device *adev, enum amdgpu_ras_block block, uint16_t pasid, pasid_notify pasid_fn, void *data, uint32_t reset); |
