summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gpu_error.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-14 11:07:56 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-14 11:07:56 -0800
commit1d36dffa5d887715dacca0f717f4519b7be5e498 (patch)
treea68f7c00dbb3036a67806ed6c6b8cc61c3cff60d /drivers/gpu/drm/i915/i915_gpu_error.c
parent2c85ebc57b3e1817b6ce1a6b703928e113a90442 (diff)
parentb10733527bfd864605c33ab2e9a886eec317ec39 (diff)
downloadlinux-1d36dffa5d887715dacca0f717f4519b7be5e498.tar.gz
linux-1d36dffa5d887715dacca0f717f4519b7be5e498.tar.bz2
linux-1d36dffa5d887715dacca0f717f4519b7be5e498.zip
Merge tag 'drm-next-2020-12-11' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "Not a huge amount of big things here, AMD has support for a few new HW variants (vangogh, green sardine, dimgrey cavefish), Intel has some more DG1 enablement. We have a few big reworks of the TTM layers and interfaces, GEM and atomic internal API reworks cross tree. fbdev is marked orphaned in here as well to reflect the current reality. core: - documentation updates - deprecate DRM_FORMAT_MOD_NONE - atomic crtc enable/disable rework - GEM convert drivers to gem object functions - remove SCATTER_LIST_MAX_SEGMENT sched: - avoid infinite waits ttm: - remove AGP support - don't modify caching for swapout - ttm pinning rework - major TTM reworks - new backend allocator - multihop support vram-helper: - top down BO placement fix - TTM changes - GEM object support displayport: - DP 2.0 DPCD prep work - DP MST extended DPCD caps fbdev: - mark as orphaned amdgpu: - Initial Vangogh support - Green Sardine support - Dimgrey Cavefish support - SG display support for renoir - SMU7 improvements - gfx9+ modiifier support - CI BACO fixes radeon: - expose voltage via hwmon on SUMO amdkfd: - fix unique id handling i915: - more DG1 enablement - bigjoiner support - integer scaling filter support - async flip support - ICL+ DSI command mode - Improve display shutdown - Display refactoring - eLLC machine fbdev loading fix - dma scatterlist fixes - TGL hang fixes - eLLC display buffer caching on SKL+ - MOCS PTE seeting for gen9+ msm: - Shutdown hook - GPU cooling device support - DSI 7nm and 10nm phy/pll updates - sm8150/sm2850 DPU support - GEM locking re-work - LLCC system cache support aspeed: - sysfs output config support ast: - LUT fix - new display mode gma500: - remove 2d framebuffer accel panfrost: - move gpu reset to a worker exynos: - new HDMI mode support mediatek: - MT8167 support - yaml bindings - MIPI DSI phy code moved etnaviv: - new perf counter - more lockdep annotation hibmc: - i2c DDC support ingenic: - pixel clock reset fix - reserved memory support - allow both DMA channels at once - different pixel format support - 30/24/8-bit palette modes tilcdc: - don't keep vblank irq enabled vc4: - new maintainer added - DSI registration fix virtio: - blob resource support - host visible and cross-device support - uuid api support" * tag 'drm-next-2020-12-11' of git://anongit.freedesktop.org/drm/drm: (1754 commits) drm/amdgpu: Initialise drm_gem_object_funcs for imported BOs drm/amdgpu: fix size calculation with stolen vga memory drm/amdgpu: remove amdgpu_ttm_late_init and amdgpu_bo_late_init drm/amdgpu: free the pre-OS console framebuffer after the first modeset drm/amdgpu: enable runtime pm using BACO on CI dGPUs drm/amdgpu/cik: enable BACO reset on Bonaire drm/amd/pm: update smu10.h WORKLOAD_PPLIB setting for raven drm/amd/pm: remove one unsupported smu function for vangogh drm/amd/display: setup system context for APUs drm/amd/display: add S/G support for Vangogh drm/amdkfd: Fix leak in dmabuf import drm/amdgpu: use AMDGPU_NUM_VMID when possible drm/amdgpu: fix sdma instance fw version and feature version init drm/amd/pm: update driver if version for dimgrey_cavefish drm/amd/display: 3.2.115 drm/amd/display: [FW Promotion] Release 0.0.45 drm/amd/display: Revert DCN2.1 dram_clock_change_latency update drm/amd/display: Enable gpu_vm_support for dcn3.01 drm/amd/display: Fixed the audio noise during mode switching with HDCP mode on drm/amd/display: Add wm table for Renoir ...
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c68
1 files changed, 30 insertions, 38 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index cf6e47adfde6..d8cac4c5881f 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -570,6 +570,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
ee->vm_info.pp_dir_base);
}
}
+ err_printf(m, " hung: %u\n", ee->hung);
err_printf(m, " engine reset count: %u\n", ee->reset_count);
for (n = 0; n < ee->num_ports; n++) {
@@ -1026,6 +1027,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
dma_addr_t dma;
for_each_sgt_daddr(dma, iter, vma->pages) {
+ mutex_lock(&ggtt->error_mutex);
ggtt->vm.insert_page(&ggtt->vm, dma, slot,
I915_CACHE_NONE, 0);
mb();
@@ -1035,6 +1037,10 @@ i915_vma_coredump_create(const struct intel_gt *gt,
(void __force *)s, dst,
true);
io_mapping_unmap(s);
+
+ mb();
+ ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE);
+ mutex_unlock(&ggtt->error_mutex);
if (ret)
break;
}
@@ -1451,6 +1457,7 @@ capture_engine(struct intel_engine_cs *engine,
static void
gt_record_engines(struct intel_gt_coredump *gt,
+ intel_engine_mask_t engine_mask,
struct i915_vma_compress *compress)
{
struct intel_engine_cs *engine;
@@ -1466,6 +1473,8 @@ gt_record_engines(struct intel_gt_coredump *gt,
if (!ee)
continue;
+ ee->hung = engine->mask & engine_mask;
+
gt->simulated |= ee->simulated;
if (ee->simulated) {
kfree(ee);
@@ -1505,25 +1514,6 @@ gt_record_uc(struct intel_gt_coredump *gt,
return error_uc;
}
-static void gt_capture_prepare(struct intel_gt_coredump *gt)
-{
- struct i915_ggtt *ggtt = gt->_gt->ggtt;
-
- mutex_lock(&ggtt->error_mutex);
-}
-
-static void gt_capture_finish(struct intel_gt_coredump *gt)
-{
- struct i915_ggtt *ggtt = gt->_gt->ggtt;
-
- if (drm_mm_node_allocated(&ggtt->error_capture))
- ggtt->vm.clear_range(&ggtt->vm,
- ggtt->error_capture.start,
- PAGE_SIZE);
-
- mutex_unlock(&ggtt->error_mutex);
-}
-
/* Capture all registers which don't fit into another category. */
static void gt_record_regs(struct intel_gt_coredump *gt)
{
@@ -1669,24 +1659,25 @@ static u32 generate_ecode(const struct intel_engine_coredump *ee)
static const char *error_msg(struct i915_gpu_coredump *error)
{
struct intel_engine_coredump *first = NULL;
+ unsigned int hung_classes = 0;
struct intel_gt_coredump *gt;
- intel_engine_mask_t engines;
int len;
- engines = 0;
for (gt = error->gt; gt; gt = gt->next) {
struct intel_engine_coredump *cs;
- if (gt->engine && !first)
- first = gt->engine;
-
- for (cs = gt->engine; cs; cs = cs->next)
- engines |= cs->engine->mask;
+ for (cs = gt->engine; cs; cs = cs->next) {
+ if (cs->hung) {
+ hung_classes |= BIT(cs->engine->uabi_class);
+ if (!first)
+ first = cs;
+ }
+ }
}
len = scnprintf(error->error_msg, sizeof(error->error_msg),
"GPU HANG: ecode %d:%x:%08x",
- INTEL_GEN(error->i915), engines,
+ INTEL_GEN(error->i915), hung_classes,
generate_ecode(first));
if (first && first->context.pid) {
/* Just show the first executing process, more is confusing */
@@ -1782,8 +1773,6 @@ i915_vma_capture_prepare(struct intel_gt_coredump *gt)
return NULL;
}
- gt_capture_prepare(gt);
-
return compress;
}
@@ -1793,14 +1782,14 @@ void i915_vma_capture_finish(struct intel_gt_coredump *gt,
if (!compress)
return;
- gt_capture_finish(gt);
-
compress_fini(compress);
kfree(compress);
}
-struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915)
+struct i915_gpu_coredump *
+i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
{
+ struct drm_i915_private *i915 = gt->i915;
struct i915_gpu_coredump *error;
/* Check if GPU capture has been disabled */
@@ -1812,7 +1801,7 @@ struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915)
if (!error)
return ERR_PTR(-ENOMEM);
- error->gt = intel_gt_coredump_alloc(&i915->gt, ALLOW_FAIL);
+ error->gt = intel_gt_coredump_alloc(gt, ALLOW_FAIL);
if (error->gt) {
struct i915_vma_compress *compress;
@@ -1824,7 +1813,7 @@ struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915)
}
gt_record_info(error->gt);
- gt_record_engines(error->gt, compress);
+ gt_record_engines(error->gt, engine_mask, compress);
if (INTEL_INFO(i915)->has_gt_uc)
error->gt->uc = gt_record_uc(error->gt, compress);
@@ -1871,20 +1860,23 @@ void i915_error_state_store(struct i915_gpu_coredump *error)
/**
* i915_capture_error_state - capture an error record for later analysis
- * @i915: i915 device
+ * @gt: intel_gt which originated the hang
+ * @engine_mask: hung engines
+ *
*
* Should be called when an error is detected (either a hang or an error
* interrupt) to capture error state from the time of the error. Fills
* out a structure which becomes available in debugfs for user level tools
* to pick up.
*/
-void i915_capture_error_state(struct drm_i915_private *i915)
+void i915_capture_error_state(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask)
{
struct i915_gpu_coredump *error;
- error = i915_gpu_coredump(i915);
+ error = i915_gpu_coredump(gt, engine_mask);
if (IS_ERR(error)) {
- cmpxchg(&i915->gpu_error.first_error, NULL, error);
+ cmpxchg(&gt->i915->gpu_error.first_error, NULL, error);
return;
}