From 62207293479e6c03ef498a70f2914c51f4d31d2c Mon Sep 17 00:00:00 2001 From: Haoxiang Li Date: Fri, 16 May 2025 15:16:55 +0300 Subject: drm/xe/display: Add check for alloc_ordered_workqueue() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add check for the return value of alloc_ordered_workqueue() in xe_display_create() to catch potential exception. Fixes: 44e694958b95 ("drm/xe/display: Implement display support") Cc: stable@vger.kernel.org Signed-off-by: Haoxiang Li Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/4ee1b0e5d1626ce1dde2e82af05c2edaed50c3aa.1747397638.git.jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit 5b62d63395d5b7d4094e7cd380bccae4b25415cb) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/display/xe_display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 68f064f33d4b..9f4ade25787a 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -104,6 +104,8 @@ int xe_display_create(struct xe_device *xe) spin_lock_init(&xe->display.fb_tracking.lock); xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0); + if (!xe->display.hotplug.dp_wq) + return -ENOMEM; return drmm_add_action_or_reset(&xe->drm, display_destroy, NULL); } -- cgit v1.2.3 From 9127a69c7193ad47047ff968a2de9161d5c93d37 Mon Sep 17 00:00:00 2001 From: Karthik Poosa Date: Tue, 17 Jun 2025 17:30:30 +0530 Subject: drm/xe/hwmon: Fix xe_hwmon_power_max_write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent other bits of mailbox power limit from being overwritten with 0. This issue was due to a missing read and modify of current power limit, before setting a requested mailbox power limit, which is added in this patch. v2: - Improve commit message. (Anshuman) v3: - Rebase. - Rephrase commit message. (Riana) - Add read-modify-write variant of xe_hwmon_pcode_write_power_limit() i.e. xe_hwmon_pcode_rmw_power_limit(). (Badal) - Use xe_hwmon_pcode_rmw_power_limit() to set mailbox power limits. - Remove xe_hwmon_pcode_write_power_limit() as all mailbox power limits writes use xe_hwmon_pcode_rmw_power_limit() only. v4: - Use PWR_LIM in place of (PWR_LIM_EN | PWR_LIM_VAL) wherever applicable. (Riana) Fixes: 25a2aa779fc3 ("drm/xe/hwmon: Add support to manage power limits though mailbox") Reviewed-by: Riana Tauro Signed-off-by: Karthik Poosa Reviewed-by: Badal Nilawar Link: https://lore.kernel.org/r/20250617120030.612819-1-karthik.poosa@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 8aa7306631f088881759398972d503757cf0c901) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/regs/xe_mchbar_regs.h | 1 + drivers/gpu/drm/xe/xe_hwmon.c | 34 ++++++++++++++------------------ 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h index 5394a1373a6b..ef2bf984723f 100644 --- a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h @@ -40,6 +40,7 @@ #define PCU_CR_PACKAGE_RAPL_LIMIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0) #define PWR_LIM_VAL REG_GENMASK(14, 0) #define PWR_LIM_EN REG_BIT(15) +#define PWR_LIM REG_GENMASK(15, 0) #define PWR_LIM_TIME REG_GENMASK(23, 17) #define PWR_LIM_TIME_X REG_GENMASK(23, 22) #define PWR_LIM_TIME_Y REG_GENMASK(21, 17) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 74f31639b37f..f008e8049700 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -159,8 +159,8 @@ static int xe_hwmon_pcode_read_power_limit(const struct xe_hwmon *hwmon, u32 att return ret; } -static int xe_hwmon_pcode_write_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel, - u32 uval) +static int xe_hwmon_pcode_rmw_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel, + u32 clr, u32 set) { struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); u32 val0, val1; @@ -179,7 +179,7 @@ static int xe_hwmon_pcode_write_power_limit(const struct xe_hwmon *hwmon, u32 at channel, val0, val1, ret); if (attr == PL1_HWMON_ATTR) - val0 = uval; + val0 = (val0 & ~clr) | set; else return -EIO; @@ -339,7 +339,7 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe if (hwmon->xe->info.has_mbx_power_limits) { drm_dbg(&hwmon->xe->drm, "disabling %s on channel %d\n", PWR_ATTR_TO_STR(attr), channel); - xe_hwmon_pcode_write_power_limit(hwmon, attr, channel, 0); + xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM_EN, 0); xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, ®_val); } else { reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN, 0); @@ -370,10 +370,9 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channe } if (hwmon->xe->info.has_mbx_power_limits) - ret = xe_hwmon_pcode_write_power_limit(hwmon, attr, channel, reg_val); + ret = xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM, reg_val); else - reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN | PWR_LIM_VAL, - reg_val); + reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM, reg_val); unlock: mutex_unlock(&hwmon->hwmon_lock); return ret; @@ -563,14 +562,11 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a mutex_lock(&hwmon->hwmon_lock); - if (hwmon->xe->info.has_mbx_power_limits) { - ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, (u32 *)&r); - r = (r & ~PWR_LIM_TIME) | rxy; - xe_hwmon_pcode_write_power_limit(hwmon, power_attr, channel, r); - } else { + if (hwmon->xe->info.has_mbx_power_limits) + xe_hwmon_pcode_rmw_power_limit(hwmon, power_attr, channel, PWR_LIM_TIME, rxy); + else r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel), PWR_LIM_TIME, rxy); - } mutex_unlock(&hwmon->hwmon_lock); @@ -1138,12 +1134,12 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon) } else { drm_info(&hwmon->xe->drm, "Using mailbox commands for power limits\n"); /* Write default limits to read from pcode from now on. */ - xe_hwmon_pcode_write_power_limit(hwmon, PL1_HWMON_ATTR, - CHANNEL_CARD, - hwmon->pl1_on_boot[CHANNEL_CARD]); - xe_hwmon_pcode_write_power_limit(hwmon, PL1_HWMON_ATTR, - CHANNEL_PKG, - hwmon->pl1_on_boot[CHANNEL_PKG]); + xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR, + CHANNEL_CARD, PWR_LIM | PWR_LIM_TIME, + hwmon->pl1_on_boot[CHANNEL_CARD]); + xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR, + CHANNEL_PKG, PWR_LIM | PWR_LIM_TIME, + hwmon->pl1_on_boot[CHANNEL_PKG]); hwmon->scl_shift_power = PWR_UNIT; hwmon->scl_shift_energy = ENERGY_UNIT; hwmon->scl_shift_time = TIME_UNIT; -- cgit v1.2.3 From a4b1b51ae132ac199412028a2df7b6c267888190 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 6 Jun 2025 11:45:47 +0100 Subject: drm/xe: Move DSB l2 flush to a more sensible place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flushing l2 is only needed after all data has been written. Fixes: 01570b446939 ("drm/xe/bmg: implement Wa_16023588340") Signed-off-by: Maarten Lankhorst Cc: Matthew Auld Cc: stable@vger.kernel.org # v6.12+ Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Reviewed-by: Lucas De Marchi Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250606104546.1996818-3-matthew.auld@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 0dd2dd0182bc444a62652e89d08c7f0e4fde15ba) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/display/xe_dsb_buffer.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index f95375451e2f..9f941fc2e36b 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -17,10 +17,7 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); - xe_device_l2_flush(xe); } u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) @@ -30,12 +27,9 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); - xe_device_l2_flush(xe); } bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) @@ -74,9 +68,12 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + /* * The memory barrier here is to ensure coherency of DSB vs MMIO, * both for weak ordering archs and discrete cards. */ - xe_device_wmb(dsb_buf->vma->bo->tile->xe); + xe_device_wmb(xe); + xe_device_l2_flush(xe); } -- cgit v1.2.3 From f16873f42a06b620669d48a4b5c3f888cb3653a1 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Jun 2025 11:45:48 +0100 Subject: drm/xe: move DPT l2 flush to a more sensible place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only need the flush for DPT host updates here. Normal GGTT updates don't need special flush. Fixes: 01570b446939 ("drm/xe/bmg: implement Wa_16023588340") Signed-off-by: Matthew Auld Cc: Maarten Lankhorst Cc: stable@vger.kernel.org # v6.12+ Reviewed-by: Ville Syrjälä Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250606104546.1996818-4-matthew.auld@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 35db1da40c8cfd7511dc42f342a133601eb45449) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index d918ae1c8061..55259969480b 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -164,6 +164,9 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, vma->dpt = dpt; vma->node = dpt->ggtt_node[tile0->id]; + + /* Ensure DPT writes are flushed */ + xe_device_l2_flush(xe); return 0; } @@ -333,8 +336,6 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (ret) goto err_unpin; - /* Ensure DPT writes are flushed */ - xe_device_l2_flush(xe); return vma; err_unpin: -- cgit v1.2.3 From ad40098da5c3b43114d860a5b5740e7204158534 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 13 Jun 2025 00:09:37 +0200 Subject: drm/xe/guc: Explicitly exit CT safe mode on unwind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During driver probe we might be briefly using CT safe mode, which is based on a delayed work, but usually we are able to stop this once we have IRQ fully operational. However, if we abort the probe quite early then during unwind we might try to destroy the workqueue while there is still a pending delayed work that attempts to restart itself which triggers a WARN. This was recently observed during unsuccessful VF initialization: [ ] xe 0000:00:02.1: probe with driver xe failed with error -62 [ ] ------------[ cut here ]------------ [ ] workqueue: cannot queue safe_mode_worker_func [xe] on wq xe-g2h-wq [ ] WARNING: CPU: 9 PID: 0 at kernel/workqueue.c:2257 __queue_work+0x287/0x710 [ ] RIP: 0010:__queue_work+0x287/0x710 [ ] Call Trace: [ ] delayed_work_timer_fn+0x19/0x30 [ ] call_timer_fn+0xa1/0x2a0 Exit the CT safe mode on unwind to avoid that warning. Fixes: 09b286950f29 ("drm/xe/guc: Allow CTB G2H processing without G2H IRQ") Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250612220937.857-3-michal.wajdeczko@intel.com (cherry picked from commit 2ddbb73ec20b98e70a5200cb85deade22ccea2ec) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_guc_ct.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index d0ac48d8f4f7..bbcbb348256f 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -34,6 +34,11 @@ #include "xe_pm.h" #include "xe_trace_guc.h" +static void receive_g2h(struct xe_guc_ct *ct); +static void g2h_worker_func(struct work_struct *w); +static void safe_mode_worker_func(struct work_struct *w); +static void ct_exit_safe_mode(struct xe_guc_ct *ct); + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) enum { /* Internal states, not error conditions */ @@ -186,14 +191,11 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) { struct xe_guc_ct *ct = arg; + ct_exit_safe_mode(ct); destroy_workqueue(ct->g2h_wq); xa_destroy(&ct->fence_lookup); } -static void receive_g2h(struct xe_guc_ct *ct); -static void g2h_worker_func(struct work_struct *w); -static void safe_mode_worker_func(struct work_struct *w); - static void primelockdep(struct xe_guc_ct *ct) { if (!IS_ENABLED(CONFIG_LOCKDEP)) -- cgit v1.2.3 From af2b588abe006bd55ddd358c4c3b87523349c475 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 13 Jun 2025 00:09:36 +0200 Subject: drm/xe: Process deferred GGTT node removals on device unwind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While we are indirectly draining our dedicated workqueue ggtt->wq that we use to complete asynchronous removal of some GGTT nodes, this happends as part of the managed-drm unwinding (ggtt_fini_early), which could be later then manage-device unwinding, where we could already unmap our MMIO/GMS mapping (mmio_fini). This was recently observed during unsuccessful VF initialization: [ ] xe 0000:00:02.1: probe with driver xe failed with error -62 [ ] xe 0000:00:02.1: DEVRES REL ffff88811e747340 __xe_bo_unpin_map_no_vm (16 bytes) [ ] xe 0000:00:02.1: DEVRES REL ffff88811e747540 __xe_bo_unpin_map_no_vm (16 bytes) [ ] xe 0000:00:02.1: DEVRES REL ffff88811e747240 __xe_bo_unpin_map_no_vm (16 bytes) [ ] xe 0000:00:02.1: DEVRES REL ffff88811e747040 tiles_fini (16 bytes) [ ] xe 0000:00:02.1: DEVRES REL ffff88811e746840 mmio_fini (16 bytes) [ ] xe 0000:00:02.1: DEVRES REL ffff88811e747f40 xe_bo_pinned_fini (16 bytes) [ ] xe 0000:00:02.1: DEVRES REL ffff88811e746b40 devm_drm_dev_init_release (16 bytes) [ ] xe 0000:00:02.1: [drm:drm_managed_release] drmres release begin [ ] xe 0000:00:02.1: [drm:drm_managed_release] REL ffff88810ef81640 __fini_relay (8 bytes) [ ] xe 0000:00:02.1: [drm:drm_managed_release] REL ffff88810ef80d40 guc_ct_fini (8 bytes) [ ] xe 0000:00:02.1: [drm:drm_managed_release] REL ffff88810ef80040 __drmm_mutex_release (8 bytes) [ ] xe 0000:00:02.1: [drm:drm_managed_release] REL ffff88810ef80140 ggtt_fini_early (8 bytes) and this was leading to: [ ] BUG: unable to handle page fault for address: ffffc900058162a0 [ ] #PF: supervisor write access in kernel mode [ ] #PF: error_code(0x0002) - not-present page [ ] Oops: Oops: 0002 [#1] SMP NOPTI [ ] Tainted: [W]=WARN [ ] Workqueue: xe-ggtt-wq ggtt_node_remove_work_func [xe] [ ] RIP: 0010:xe_ggtt_set_pte+0x6d/0x350 [xe] [ ] Call Trace: [ ] [ ] xe_ggtt_clear+0xb0/0x270 [xe] [ ] ggtt_node_remove+0xbb/0x120 [xe] [ ] ggtt_node_remove_work_func+0x30/0x50 [xe] [ ] process_one_work+0x22b/0x6f0 [ ] worker_thread+0x1e8/0x3d Add managed-device action that will explicitly drain the workqueue with all pending node removals prior to releasing MMIO/GSM mapping. Fixes: 919bb54e989c ("drm/xe: Fix missing runtime outer protection for ggtt_remove_node") Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Cc: Lucas De Marchi Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250612220937.857-2-michal.wajdeczko@intel.com (cherry picked from commit 89d2835c3680ab1938e22ad81b1c9f8c686bd391) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_ggtt.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 7062115909f2..2c799958c1e4 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -201,6 +201,13 @@ static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { .ggtt_set_pte = xe_ggtt_set_pte_and_flush, }; +static void dev_fini_ggtt(void *arg) +{ + struct xe_ggtt *ggtt = arg; + + drain_workqueue(ggtt->wq); +} + /** * xe_ggtt_init_early - Early GGTT initialization * @ggtt: the &xe_ggtt to be initialized @@ -257,6 +264,10 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (err) return err; + err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); + if (err) + return err; + if (IS_SRIOV_VF(xe)) { err = xe_gt_sriov_vf_prepare_ggtt(xe_tile_get_gt(ggtt->tile, 0)); if (err) -- cgit v1.2.3