// SPDX-License-Identifier: MIT
/*
* Copyright © 2008-2018 Intel Corporation
*/
#include <linux/sched/mm.h>
#include <linux/stop_machine.h>
#include <linux/string_helpers.h>
#include "display/intel_display_reset.h"
#include "display/intel_overlay.h"
#include "gem/i915_gem_context.h"
#include "gt/intel_gt_regs.h"
#include "i915_drv.h"
#include "i915_file_private.h"
#include "i915_gpu_error.h"
#include "i915_irq.h"
#include "intel_breadcrumbs.h"
#include "intel_engine_pm.h"
#include "intel_engine_regs.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
#include "intel_mchbar_regs.h"
#include "intel_pci_config.h"
#include "intel_reset.h"
#include "uc/intel_guc.h"
#define RESET_MAX_RETRIES 3
/* XXX How to handle concurrent GGTT updates using tiling registers? */
#define RESET_UNDER_STOP_MACHINE 0
static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
{
struct drm_i915_file_private *file_priv = ctx->file_priv;
unsigned long prev_hang;
unsigned int score;
if (IS_ERR_OR_NULL(file_priv))
return;
score = 0;
if (banned)
score = I915_CLIENT_SCORE_CONTEXT_BAN;
prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
score += I915_CLIENT_SCORE_HANG_FAST;
if (score) {
atomic_add(score, &file_priv->ban_score);
drm_dbg(&ctx->i915->drm,
"client %s: gained %u ban score, now %u\n",
ctx->name, score,
atomic_read(&file_priv->ban_score));
}
}
static bool mark_guilty(struct i915_request *rq)
{
struct i915_gem_context *ctx;
unsigned long prev_hang;
bool banned;
int i;
if (intel_context_is_closed(rq->context))
return true;
rcu_read_lock();
ctx = rcu_dereference(rq->context->gem_context);
if (ctx && !kref_get_unless_zero(&ctx->ref))
ctx = NULL;
rcu_read_unlock();
if (!ctx)
return intel_context_is_banned(rq->context);
atomic_inc(&ctx->guilty_count);
/* Cool contexts are too cool to be banned! (Used for reset testing.) */
if (!i915_gem_context_is_bannable(ctx)) {
banned = false;
goto out;
}
drm_notice(&ctx->i915->drm,
"%s context reset due to GPU hang\n",
ctx->name);
/* Record the timestamp for the last N hangs */
prev_hang = ctx->hang_timestamp[0];
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp) - 1; i++)
ctx->hang_timestamp[i] = ctx->hang_timestamp[i + 1];
ctx->hang_timestamp[i] = jiffies;
/* If we have hung N+1 times in rapid succession, we ban the context! */
banned = !i915_gem_context_is_recoverable(ctx);
if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES))
banned = true;
if (banned)
drm_dbg(&ctx->i915->drm, "context %s: guilty %d, banned\n",
ctx->name, atomic_read(&ctx->guilty_count));
client_mark_guilty(ctx, banned);
out:
i915_gem_context_put(ctx);
return banned;
}
static void mark_innocent(struct i915_request *rq)
{
struct i915_gem_context *ctx;
rcu_read_lock();
ctx = rcu_dereference(rq->context->gem_context);
if (ctx)
atomic_inc(&ctx->active_count);
rcu_read_unlock();
}
void __i915_request_reset(struct i915_request *rq, bool guilty)
{
bool banned = false;
RQ_TRACE(rq, "guilty? %s\n", str_yes_no(guilty));
GEM_BUG_ON(__i915_request_is_complete(rq));
rcu_read_lock(); /* protect the GEM context */
if (guilty) {
i915_request_set_error_once(rq, -EIO);
__i915_request_skip(rq);
banned = mark_guilty(rq);
} else {
i915_request_set_error_once(rq, -EAGAIN);
mark_innocent(rq);
}
rcu_read_unlock();
if (banned)
intel_context_ban(rq->context, rq);
}
static bool i915_in_reset(struct pci_dev *pdev)
{
u8 gdrst;
pci_read_config_byte(pdev, I915_GDRST, &gdrst);
return gdrst & GRDOM_RESET_STATUS;
}
static int i915_do_reset(struct intel_gt *gt,
intel_engine_mask_t engine_mask,
unsigned int retry)
{
struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev);
int err;
/* Assert reset for at least 20 usec, and wait for acknowledgement. */
pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
udelay(50);
err = wait_for_atomic(i915_in_reset(pdev), 50);
/* Clear the reset request. */
pci_write_config_byte(pdev, I915_GDRST, 0);
udelay(50);
if (!err)
err = wait_for_atomic(!i915_in_reset(pdev), 50);
return err;
}
static bool g4x_reset_complete(struct pci_dev *pdev)
{
u8 gdrst;
pci_read_config_byte(pdev, I915_GDRST, &gdrst);
return (gdrst & GRDOM_RESET_ENABLE) == 0;
}
static int g33_do_reset(struct intel_gt *gt,
intel_engine_mask_t engine_mask,
unsigned int retry)
{
struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev);
pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
return wait_for_atomic(g4x_reset_complete(pdev), 50);
}
static int g4x_do_reset(struct intel_gt *gt,
intel_engine_mask_t engine_mask,
unsigned int retry)
{
struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev);
struct intel_uncore *uncore = gt->uncore;
int ret;
/* WaVcpClkGateDisableForMediaReset:ctg,elk */
intel_uncore_rmw_fw(uncore, VDECCLK_GATE_D, 0, VCP_UNIT_CLOCK_GATE_DISABLE);
intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D);
pci_write_config_byte(pdev, I915_GDRST,
GRDOM_MEDIA | GRDOM_RESET_ENABLE);
ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
if (ret) {
GT_TRACE(gt, "Wait for media reset failed\n");
goto out;
}
pci_write_config_byte(pdev, I915_GDRST,
GRDOM_RENDER | GRDOM_RESET_ENABLE);
ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
if (ret) {
GT_TRACE(gt, "Wait for render reset failed\n");
goto out;
}
out:
pci_write_config_byte(pdev, I915_GDRST, 0);
intel_uncore_rmw_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE, 0);
intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D);
return ret;
}
static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask,
unsigned int retry)
{
struct intel_uncore *
|