diff options
author | Dave Airlie <airlied@redhat.com> | 2022-08-26 10:01:44 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2022-08-26 10:03:43 +1000 |
commit | 2c2d7a67defa198a8b8148dbaddc9e5554efebc8 (patch) | |
tree | 2906871b6c5426ea9e4b845f779cc17d3be30491 /drivers | |
parent | 1c23f9e627a7b412978b4e852793c5e3c3efc555 (diff) | |
parent | 5ece208ab05e4042c80ed1e6fe6d7ce236eee89b (diff) | |
download | linux-2c2d7a67defa198a8b8148dbaddc9e5554efebc8.tar.gz linux-2c2d7a67defa198a8b8148dbaddc9e5554efebc8.tar.bz2 linux-2c2d7a67defa198a8b8148dbaddc9e5554efebc8.zip |
Merge tag 'drm-intel-gt-next-2022-08-24' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
UAPI Changes:
- Create gt/gtN/.defaults/ for per gt sysfs defaults
Create a gt/gtN/.defaults/ directory (similar to
engine/<engine-name>/.defaults/) to expose default parameter values for
each gt in sysfs. This allows userspace to restore default parameter values
after they have changed.
Driver Changes:
- Support GuC v69 in parallel to v70 (Daniele)
- Improve TLB invalidation to limit performance regression (Chris, Mauro)
- Expose per-gt RPS defaults in sysfs (Ashutosh)
- Suppress OOM warning for shmemfs object allocation failure (Chris, Nirmoy)
- Disable PCI resize on 32-bit machines (Nirmoy)
- Update DG2 to GuC v70.4.1 (John)
- Fix CCS data copying on DG2 during swapping (Matt A)
- Add DG2 performance tuning setting recommended by spec (Matt R)
- Add GuC <-> kernel time stamp translation information to error logs (John)
- Record GuC CTB info in error logs (John)
- Route semaphores to GuC for Gen12+ when enabled (Michal Wi, John)
- Improve resilency to bug #3575: Handle reset timeouts under unrelated kernel hangs (Chris, Ashutosh)
- Avoid system freeze by removing shared locking on freeing objects (Chris, Nirmoy)
- Demote GuC error "No response for request" into debug when expected (Zhanjun)
- Fix GuC capture size warning and bump the size (John)
- Use streaming loads to speed up dumping the GuC log (Chris, John)
- Don't abort on CTB_UNUSED status from GuC (John)
- Don't send spurious policy update for GuC child contexts (Daniele)
- Don't leak the CCS state (Matt A)
- Prefer drm_err over pr_err (John)
- Eliminate unused calc_ctrl_surf_instr_size (Matt A)
- Add dedicated function for non-ctx register tuning settings (Matt R)
- Style and typo fixes, documentation improvements (Jason Wang, Mauro)
- Selftest improvements (Matt B, Rahul, John)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/YwYTCjA/Rhpd1n4A@jlahtine-mobl.ger.corp.intel.com
Diffstat (limited to 'drivers')
34 files changed, 627 insertions, 228 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 4eed3dd90ba8..f42ca1179f37 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -75,7 +75,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, if (size > resource_size(&mr->region)) return -ENOMEM; - if (sg_alloc_table(st, page_count, GFP_KERNEL)) + if (sg_alloc_table(st, page_count, GFP_KERNEL | __GFP_NOWARN)) return -ENOMEM; /* @@ -137,7 +137,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, * trigger the out-of-memory killer and for * this we want __GFP_RETRY_MAYFAIL. */ - gfp |= __GFP_RETRY_MAYFAIL; + gfp |= __GFP_RETRY_MAYFAIL | __GFP_NOWARN; } } while (1); @@ -209,7 +209,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); rebuild_st: - st = kmalloc(sizeof(*st), GFP_KERNEL); + st = kmalloc(sizeof(*st), GFP_KERNEL | __GFP_NOWARN); if (!st) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index a334787a4939..6c9a46452364 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -55,6 +55,14 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt) for (tmp = 1, intel_gt_pm_get(gt); tmp; \ intel_gt_pm_put(gt), tmp = 0) +/** + * with_intel_gt_pm_if_awake - if GT is PM awake, get a reference to prevent + * it to sleep, run some code and then asynchrously put the reference + * away. + * + * @gt: pointer to the gt + * @wf: pointer to a temporary wakeref. + */ #define with_intel_gt_pm_if_awake(gt, wf) \ for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 60d6eb5f245b..94f9ddcfb3a5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -259,6 +259,9 @@ #define GEN9_PREEMPT_GPGPU_COMMAND_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(1, 0) #define GEN9_PREEMPT_GPGPU_LEVEL_MASK GEN9_PREEMPT_GPGPU_LEVEL(1, 1) +#define DRAW_WATERMARK _MMIO(0x26c0) +#define VERT_WM_VAL REG_GENMASK(9, 0) + #define GEN12_GLOBAL_MOCS(i) _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */ #define RENDER_HWS_PGA_GEN7 _MMIO(0x4080) @@ -374,6 +377,9 @@ #define CHICKEN_RASTER_1 _MMIO(0x6204) #define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) +#define CHICKEN_RASTER_2 _MMIO(0x6208) +#define TBIMR_FAST_CLIP REG_BIT(5) + #define VFLSKPD _MMIO(0x62a8) #define DIS_OVER_FETCH_CACHE REG_BIT(1) #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) @@ -1007,6 +1013,8 @@ #define GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC (1 << 9) #define GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC (1 << 7) +#define GUCPMTIMESTAMP _MMIO(0xc3e8) + #define __GEN9_RCS0_MOCS0 0xc800 #define GEN9_GFX_MOCS(i) _MMIO(__GEN9_RCS0_MOCS0 + (i) * 4) #define __GEN9_VCS0_MOCS0 0xc900 @@ -1078,6 +1086,7 @@ #define GEN10_SAMPLER_MODE _MMIO(0xe18c) #define ENABLE_SMALLPL REG_BIT(15) +#define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) #define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) #define GEN9_HALF_SLICE_CHICKEN7 _MMIO(0xe194) @@ -1123,6 +1132,8 @@ #define RT_CTRL _MMIO(0xe530) #define DIS_NULL_QUERY REG_BIT(10) +#define STACKID_CTRL REG_GENMASK(6, 5) +#define STACKID_CTRL_512 REG_FIELD_PREP(STACKID_CTRL, 0x2) #define EU_PERF_CNTL1 _MMIO(0xe558) #define EU_PERF_CNTL5 _MMIO(0xe55c) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c index 9e4ebf53379b..d651ccd0ab20 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c @@ -22,11 +22,6 @@ bool is_object_gt(struct kobject *kobj) return !strncmp(kobj->name, "gt", 2); } -static struct intel_gt *kobj_to_gt(struct kobject *kobj) -{ - return container_of(kobj, struct intel_gt, sysfs_gt); -} - struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, const char *name) { @@ -101,6 +96,10 @@ void intel_gt_sysfs_register(struct intel_gt *gt) gt->i915->sysfs_gt, "gt%d", gt->info.id)) goto exit_fail; + gt->sysfs_defaults = kobject_create_and_add(".defaults", >->sysfs_gt); + if (!gt->sysfs_defaults) + goto exit_fail; + intel_gt_sysfs_pm_init(gt, >->sysfs_gt); return; @@ -113,5 +112,6 @@ exit_fail: void intel_gt_sysfs_unregister(struct intel_gt *gt) { + kobject_put(gt->sysfs_defaults); kobject_put(>->sysfs_gt); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h index a99aa7e8b01a..6232923a420d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h @@ -10,6 +10,7 @@ #include <linux/kobject.h> #include "i915_gem.h" /* GEM_BUG_ON() */ +#include "intel_gt_types.h" struct intel_gt; @@ -22,6 +23,11 @@ intel_gt_create_kobj(struct intel_gt *gt, struct kobject *dir, const char *name); +static inline struct intel_gt *kobj_to_gt(struct kobject *kobj) +{ + return container_of(kobj, struct intel_gt, sysfs_gt); +} + void intel_gt_sysfs_register(struct intel_gt *gt); void intel_gt_sysfs_unregister(struct intel_gt *gt); struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index 73a8b46e0234..e066cc33d9f2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -727,6 +727,34 @@ static const struct attribute *media_perf_power_attrs[] = { NULL }; +static ssize_t +default_min_freq_mhz_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_gt *gt = kobj_to_gt(kobj->parent); + + return sysfs_emit(buf, "%u\n", gt->defaults.min_freq); +} + +static struct kobj_attribute default_min_freq_mhz = +__ATTR(rps_min_freq_mhz, 0444, default_min_freq_mhz_show, NULL); + +static ssize_t +default_max_freq_mhz_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_gt *gt = kobj_to_gt(kobj->parent); + + return sysfs_emit(buf, "%u\n", gt->defaults.max_freq); +} + +static struct kobj_attribute default_max_freq_mhz = +__ATTR(rps_max_freq_mhz, 0444, default_max_freq_mhz_show, NULL); + +static const struct attribute * const rps_defaults_attrs[] = { + &default_min_freq_mhz.attr, + &default_max_freq_mhz.attr, + NULL +}; + static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj, const struct attribute * const *attrs) { @@ -776,4 +804,10 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) "failed to create gt%u media_perf_power_attrs sysfs (%pe)\n", gt->info.id, ERR_PTR(ret)); } + + ret = sysfs_create_files(gt->sysfs_defaults, rps_defaults_attrs); + if (ret) + drm_warn(>->i915->drm, + "failed to add gt%u rps defaults (%pe)\n", + gt->info.id, ERR_PTR(ret)); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 3804a583382b..4d56f7d5a3be 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -76,6 +76,11 @@ enum intel_submission_method { INTEL_SUBMISSION_GUC, }; +struct gt_defaults { + u32 min_freq; + u32 max_freq; +}; + struct intel_gt { struct drm_i915_private *i915; struct intel_uncore *uncore; @@ -251,6 +256,10 @@ struct intel_gt { /* gt/gtN sysfs */ struct kobject sysfs_gt; + + /* sysfs defaults per gt */ + struct gt_defaults defaults; + struct kobject *sysfs_defaults; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 2b10b96b17b5..aaaf1906026c 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -511,44 +511,16 @@ static inline u32 *i915_flush_dw(u32 *cmd, u32 flags) return cmd; } -static u32 calc_ctrl_surf_instr_size(struct drm_i915_private *i915, int size) -{ - u32 num_cmds, num_blks, total_size; - - if (!GET_CCS_BYTES(i915, size)) - return 0; - - /* - * XY_CTRL_SURF_COPY_BLT transfers CCS in 256 byte - * blocks. one XY_CTRL_SURF_COPY_BLT command can - * transfer upto 1024 blocks. - */ - num_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size), - NUM_CCS_BYTES_PER_BLOCK); - num_cmds = DIV_ROUND_UP(num_blks, NUM_CCS_BLKS_PER_XFER); - total_size = XY_CTRL_SURF_INSTR_SIZE * num_cmds; - - /* - * Adding a flush before and after XY_CTRL_SURF_COPY_BLT - */ - total_size += 2 * MI_FLUSH_DW_SIZE; - - return total_size; -} - static int emit_copy_ccs(struct i915_request *rq, u32 dst_offset, u8 dst_access, u32 src_offset, u8 src_access, int size) { struct drm_i915_private *i915 = rq->engine->i915; int mocs = rq->engine->gt->mocs.uc_index << 1; - u32 num_ccs_blks, ccs_ring_size; + u32 num_ccs_blks; u32 *cs; - ccs_ring_size = calc_ctrl_surf_instr_size(i915, size); - WARN_ON(!ccs_ring_size); - - cs = intel_ring_begin(rq, round_up(ccs_ring_size, 2)); + cs = intel_ring_begin(rq, 12); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -583,8 +555,7 @@ static int emit_copy_ccs(struct i915_request *rq, FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs); cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS); - if (ccs_ring_size & 1) - *cs++ = MI_NOOP; + *cs++ = MI_NOOP; intel_ring_advance(rq, cs); @@ -638,40 +609,38 @@ static int emit_copy(struct i915_request *rq, return 0; } -static int scatter_list_length(struct scatterlist *sg) +static u64 scatter_list_length(struct scatterlist *sg) { - int len = 0; + u64 len = 0; while (sg && sg_dma_len(sg)) { len += sg_dma_len(sg); sg = sg_next(sg); - }; + } return len; } -static void +static int calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem, - int *src_sz, u32 bytes_to_cpy, u32 ccs_bytes_to_cpy) + u64 bytes_to_cpy, u64 ccs_bytes_to_cpy) { - if (ccs_bytes_to_cpy) { - if (!src_is_lmem) - /* - * When CHUNK_SZ is passed all the pages upto CHUNK_SZ - * will be taken for the blt. in Flat-ccs supported - * platform Smem obj will have more pages than required - * for main meory hence limit it to the required size - * for main memory - */ - *src_sz = min_t(int, bytes_to_cpy, CHUNK_SZ); - } else { /* ccs handling is not required */ - *src_sz = CHUNK_SZ; - } + if (ccs_bytes_to_cpy && !src_is_lmem) + /* + * When CHUNK_SZ is passed all the pages upto CHUNK_SZ + * will be taken for the blt. in Flat-ccs supported + * platform Smem obj will have more pages than required + * for main meory hence limit it to the required size + * for main memory + */ + return min_t(u64, bytes_to_cpy, CHUNK_SZ); + else + return CHUNK_SZ; } -static void get_ccs_sg_sgt(struct sgt_dma *it, u32 bytes_to_cpy) +static void get_ccs_sg_sgt(struct sgt_dma *it, u64 bytes_to_cpy) { - u32 len; + u64 len; do { GEM_BUG_ON(!it->sg || !sg_dma_len(it->sg)); @@ -702,12 +671,12 @@ intel_context_migrate_copy(struct intel_context *ce, { struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst), it_ccs; struct drm_i915_private *i915 = ce->engine->i915; - u32 ccs_bytes_to_cpy = 0, bytes_to_cpy; + u64 ccs_bytes_to_cpy = 0, bytes_to_cpy; enum i915_cache_level ccs_cache_level; u32 src_offset, dst_offset; u8 src_access, dst_access; struct i915_request *rq; - int src_sz, dst_sz; + u64 src_sz, dst_sz; bool ccs_is_src, overwrite_ccs; int err; @@ -790,8 +759,8 @@ intel_context_migrate_copy(struct intel_context *ce, if (err) goto out_rq; - calculate_chunk_sz(i915, src_is_lmem, &src_sz, - bytes_to_cpy, ccs_bytes_to_cpy); + src_sz = calculate_chunk_sz(i915, src_is_lmem, + bytes_to_cpy, ccs_bytes_to_cpy); len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, src_offset, src_sz); diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index c68d36fb5bbd..1211774e1d91 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1281,9 +1281,6 @@ static void intel_gt_reset_global(struct intel_gt *gt, intel_wedge_on_timeout(&w, gt, 5 * HZ) { intel_display_prepare_reset(gt->i915); - /* Flush everyone using a resource about to be clobbered */ - synchronize_srcu_expedited(>->reset.backoff_srcu); - intel_gt_reset(gt, engine_mask, reason); intel_display_finish_reset(gt->i915); @@ -1392,6 +1389,9 @@ void intel_gt_handle_error(struct intel_gt *gt, } } + /* Flush everyone using a resource about to be clobbered */ + synchronize_srcu_expedited(>->reset.backoff_srcu); + intel_gt_reset_global(gt, engine_mask, msg); if (!intel_uc_uses_guc_submission(>->uc)) { diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index fb3f57ee450b..c7d381ad90cf 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1979,7 +1979,9 @@ void intel_rps_init(struct intel_rps *rps) /* Derive initial user preferences/limits from the hardware limits */ rps->max_freq_softlimit = rps->max_freq; + rps_to_gt(rps)->defaults.max_freq = rps->max_freq_softlimit; rps->min_freq_softlimit = rps->min_freq; + rps_to_gt(rps)->defaults.min_freq = rps->min_freq_softlimit; /* After setting max-softlimit, find the overclock max freq */ if (GRAPHICS_VER(i915) == 6 || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) { diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index e8111fce56d0..31e129329fb0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -568,6 +568,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { + wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP); wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); wa_add(wal, @@ -2102,13 +2103,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) /* Wa_1509235366:dg2 */ wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE); - - /* - * The following are not actually "workarounds" but rather - * recommended tuning settings documented in the bspec's - * performance guide section. - */ - wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); } if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { @@ -2119,6 +2113,13 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); } + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || + IS_DG2_G11(i915) || IS_DG2_G12(i915)) { + /* Wa_1509727124:dg2 */ + wa_masked_en(wal, GEN10_SAMPLER_MODE, + SC_DISABLE_POWER_OPTIMIZATION_EBB); + } + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) || IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { /* Wa_14012419201:dg2 */ @@ -2195,15 +2196,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || - IS_DG2_G11(i915)) { - /* Wa_22012654132:dg2 */ - wa_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), - 0 /* write-only, so skip validation */, - true); - } - /* Wa_14013202645:dg2 */ if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) @@ -2670,6 +2662,49 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } /* + * The bspec performance guide has recommended MMIO tuning settings. These + * aren't truly "workarounds" but we want to program them with the same + * workaround infrastructure to ensure that they're automatically added to + * the GuC save/restore lists, re-applied at the right times, and checked for + * any conflicting programming requested by real workarounds. + * + * Programming settings should be added here only if their registers are not + * part of an engine's register state context. If a register is part of a + * context, then any tuning settings should be programmed in an appropriate + * function invoked by __intel_engine_init_ctx_wa(). + */ +static void +add_render_compute_tuning_settings(struct drm_i915_private *i915, + struct i915_wa_list *wal) +{ + if (IS_PONTEVECCHIO(i915)) { + wa_write(wal, XEHPC_L3SCRUB, + SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK); + } + + if (IS_DG2(i915)) { + wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); + wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); + wa_write_clr_set(wal, DRAW_WATERMARK, VERT_WM_VAL, + REG_FIELD_PREP(VERT_WM_VAL, 0x3FF)); + + /* + * This is also listed as Wa_22012654132 for certain DG2 + * steppings, but the tuning setting programming is a superset + * since it applies to all DG2 variants and steppings. + * + * Note that register 0xE420 is write-only and cannot be read + * back for verification on DG2 (due to Wa_14012342262), so + * we need to explicitly skip the readback. + */ + wa_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), + 0 /* write-only, so skip validation */, + true); + } +} + +/* * The workarounds in this function apply to shared registers in * the general render reset domain that aren't tied to a * specific engine. Since all render+compute engines get reset @@ -2683,14 +2718,9 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li { struct drm_i915_private *i915 = engine->i915; - if (IS_PONTEVECCHIO(i915)) { - /* - * The following is not actually a "workaround" but rather - * a recommended tuning setting documented in the bspec's - * performance guide section. - */ - wa_write(wal, XEHPC_L3SCRUB, SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK); + add_render_compute_tuning_settings(i915, wal); + if (IS_PONTEVECCHIO(i915)) { /* Wa_16016694945 */ wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC); } diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 09f8cd2d0e2c..1e08b2473b99 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -2077,7 +2077,7 @@ static int __cancel_active0(struct live_preempt_cancel *arg) goto out; } - intel_context_set_banned(rq->context); + intel_context_ban(rq->context, rq); err = intel_engine_pulse(arg->engine); if (err) goto out; @@ -2136,7 +2136,7 @@ static int __cancel_active1(struct live_preempt_cancel *arg) if (err) goto out; - intel_context_set_banned(rq[1]->context); + intel_context_ban(rq[1]->context, rq[1]); err = intel_engine_pulse(arg->engine); if (err) goto out; @@ -2219,7 +2219,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg) if (err) goto out; - intel_context_set_banned(rq[2]->context); + intel_context_ban(rq[2]->context, rq[2]); err = intel_engine_pulse(arg->engine); if (err) goto out; @@ -2234,7 +2234,13 @@ static int __cancel_queued(struct live_preempt_cancel *arg) goto out; } - if (rq[1]->fence.error != 0) { + /* + * The behavior between having semaphores and not is different. With + * semaphores the subsequent request is on the hardware and not cancelled + * while without the request is held in the driver and cancelled. + */ + if (intel_engine_has_semaphores(rq[1]->engine) && + rq[1]->fence.error != 0) { pr_err("Normal inflight1 request did not complete\n"); err = -EINVAL; goto out; @@ -2282,7 +2288,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg) goto out; } - intel_context_set_banned(rq->context); + intel_context_ban(rq->context, rq); err = intel_engine_pulse(arg->engine); /* force reset */ if (err) goto out; diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 6493265d5f64..7f3bb1d34dfb 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1302,13 +1302,15 @@ static int igt_reset_wait(void *arg) { struct intel_gt *gt = arg; struct i915_gpu_error *global = >->i915->gpu_error; - struct intel_engine_cs *engine = gt->engine[RCS0]; + struct intel_engine_cs *engine; struct i915_request *rq; unsigned int reset_count; struct hang h; long timeout; int err; + engine = intel_selftest_find_any_engine(gt); + if (!engine || !intel_engine_can_store_dword(engine)) return 0; @@ -1432,7 +1434,7 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, int (*fn)(void *), unsigned int flags) { - struct intel_engine_cs *engine = gt->engine[RCS0]; + struct intel_engine_cs *engine; struct drm_i915_gem_object *obj; struct task_struct *tsk = NULL; struct i915_request *rq; @@ -1444,6 +1446,8 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, if (!gt->ggtt->num_fences && flags & EXEC_OBJECT_NEEDS_FENCE) return 0; + engine = intel_selftest_find_any_engine(gt); + if (!engine || !intel_engine_can_store_dword(engine)) return 0; @@ -1819,12 +1823,14 @@ static int igt_handle_error(void *arg) { struct intel_gt *gt = arg; struct i915_gpu_error *global = >->i915->gpu_error; - struct intel_engine_cs *engine = gt->engine[RCS0]; + struct intel_engine_cs *engine; struct hang h; struct i915_request *rq; struct i915_gpu_coredump *error; int err; + engine = intel_selftest_find_any_engine(gt); + /* Check that we can issue a global GPU and engine reset */ if (!intel_has_reset_engine(gt)) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h index df83c1cc7c7a..28b8387f97b7 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h @@ -37,6 +37,7 @@ * | | | - _`GUC_CTB_STATUS_OVERFLOW` = 1 (head/tail too large) | * | | | - _`GUC_CTB_STATUS_UNDERFLOW` = 2 (truncated message) | * | | | - _`GUC_CTB_STATUS_MISMATCH` = 4 (head/tail modified) | + * | | | - _`GUC_CTB_STATUS_UNUSED` = 8 (CTB is not in use) | * +---+-------+--------------------------------------------------------------+ * |...| | RESERVED = MBZ | * +---+-------+--------------------------------------------------------------+ @@ -49,9 +50,10 @@ struct guc_ct_buffer_desc { u32 tail; u32 status; #define GUC_CTB_STATUS_NO_ERROR 0 -#define GUC_CTB_STATUS_OVERFLOW (1 |