diff options
| author | Dave Airlie <airlied@redhat.com> | 2023-08-07 13:49:24 +1000 |
|---|---|---|
| committer | Dave Airlie <airlied@redhat.com> | 2023-08-07 13:49:25 +1000 |
| commit | d9aa1da9a8cfb0387eb5703c15bd1f54421460ac (patch) | |
| tree | fd64841d76f3408ed5d25f4d9aaa5088a752e2fd | |
| parent | 7c9aa0f7463eede3226daf06ff7ccbb813f8b739 (diff) | |
| parent | 28e671114fb0f28f334fac8d0a6b9c395c7b0498 (diff) | |
| download | linux-d9aa1da9a8cfb0387eb5703c15bd1f54421460ac.tar.gz linux-d9aa1da9a8cfb0387eb5703c15bd1f54421460ac.tar.bz2 linux-d9aa1da9a8cfb0387eb5703c15bd1f54421460ac.zip | |
Merge tag 'drm-intel-gt-next-2023-08-04' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
Driver Changes:
- Avoid infinite GPU waits by avoidin premature release of request's
reusable memory (Chris, Janusz)
- Expose RPS thresholds in sysfs (Tvrtko)
- Apply GuC SLPC min frequency softlimit correctly (Vinay)
- Restore SLPC efficient freq earlier (Vinay)
- Consider OA buffer boundary when zeroing out reports (Umesh)
- Extend Wa_14015795083 to TGL, RKL, DG1 and ADL (Matt R)
- Fix context workarounds with non-masked regs on MTL/DG2 (Lucas)
- Enable the CCS_FLUSH bit in the pipe control and in the CS for MTL+ (Andi)
- Update MTL workarounds 14018778641, 22016122933 (Tejas, Zhanjun)
- Ensure memory quiesced before AUX CCS invalidation (Jonathan)
- Add a gsc_info debugfs (Daniele)
- Invalidate the TLBs on each GT on multi-GT device (Chris)
- Fix a VMA UAF for multi-gt platform (Nirmoy)
- Do not use stolen on MTL due to HW bug (Nirmoy)
- Check HuC and GuC version compatibility on MTL (Daniele)
- Dump perf_limit_reasons for slow GuC init debug (Vinay)
- Replace kmap() with kmap_local_page() (Sumitra, Ira)
- Add sentinel to xehp_oa_b_counters for KASAN (Andrzej)
- Add the gen12_needs_ccs_aux_inv helper (Andi)
- Fixes and updates for GSC memory allocation (Daniele)
- Fix one wrong caching mode enum usage (Tvrtko)
- Fixes for GSC wakeref (Alan)
- Static checker fixes (Harshit, Arnd, Dan, Cristophe, David, Andi)
- Rename flags with bit_group_X according to the datasheet (Andi)
- Use direct alias for i915 in requests (Andrzej)
- Replace i915->gt0 with to_gt(i915) (Andi)
- Use the i915_vma_flush_writes helper (Tvrtko)
- Selftest improvements (Alan)
- Remove dead code (Tvrtko)
Signed-off-by: Dave Airlie <airlied@redhat.com>
# Conflicts:
# drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/ZMy6kDd9npweR4uy@jlahtine-mobl.ger.corp.intel.com
65 files changed, 1529 insertions, 531 deletions
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index dcbda9ba32dd..79f65eff6bb2 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -132,6 +132,7 @@ gt-y += \ gt/intel_sseu.o \ gt/intel_sseu_debugfs.o \ gt/intel_timeline.o \ + gt/intel_tlb.o \ gt/intel_wopcm.o \ gt/intel_workarounds.o \ gt/shmem_utils.o \ @@ -197,7 +198,8 @@ i915-y += \ gt/uc/intel_gsc_fw.o \ gt/uc/intel_gsc_proxy.o \ gt/uc/intel_gsc_uc.o \ - gt/uc/intel_gsc_uc_heci_cmd_submit.o\ + gt/uc/intel_gsc_uc_debugfs.o \ + gt/uc/intel_gsc_uc_heci_cmd_submit.o \ gt/uc/intel_guc.o \ gt/uc/intel_guc_ads.o \ gt/uc/intel_guc_capture.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index dfaaa8b66ac3..ffddec1d2a76 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -68,10 +68,8 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) switch (obj->write_domain) { case I915_GEM_DOMAIN_GTT: spin_lock(&obj->vma.lock); - for_each_ggtt_vma(vma, obj) { - if (i915_vma_unset_ggtt_write(vma)) - intel_gt_flush_ggtt_writes(vma->vm->gt); - } + for_each_ggtt_vma(vma, obj) + i915_vma_flush_writes(vma); spin_unlock(&obj->vma.lock); i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index cfd7929587d8..5a687a3686bd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2229,8 +2229,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) u32 *cs; int i; - if (GRAPHICS_VER(rq->engine->i915) != 7 || rq->engine->id != RCS0) { - drm_dbg(&rq->engine->i915->drm, "sol reset is gen7/rcs only\n"); + if (GRAPHICS_VER(rq->i915) != 7 || rq->engine->id != RCS0) { + drm_dbg(&rq->i915->drm, "sol reset is gen7/rcs only\n"); return -EINVAL; } @@ -2691,6 +2691,7 @@ static int eb_select_engine(struct i915_execbuffer *eb) { struct intel_context *ce, *child; + struct intel_gt *gt; unsigned int idx; int err; @@ -2714,10 +2715,17 @@ eb_select_engine(struct i915_execbuffer *eb) } } eb->num_batches = ce->parallel.number_children + 1; + gt = ce->engine->gt; for_each_child(ce, child) intel_context_get(child); - intel_gt_pm_get(ce->engine->gt); + intel_gt_pm_get(gt); + /* + * Keep GT0 active on MTL so that i915_vma_parked() doesn't + * free VMAs while execbuf ioctl is validating VMAs. + */ + if (gt->info.id) + intel_gt_pm_get(to_gt(gt->i915)); if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { err = intel_context_alloc_state(ce); @@ -2756,7 +2764,10 @@ eb_select_engine(struct i915_execbuffer *eb) return err; err: - intel_gt_pm_put(ce->engine->gt); + if (gt->info.id) + intel_gt_pm_put(to_gt(gt->i915)); + + intel_gt_pm_put(gt); for_each_child(ce, child) intel_context_put(child); intel_context_put(ce); @@ -2769,6 +2780,12 @@ eb_put_engine(struct i915_execbuffer *eb) struct intel_context *child; i915_vm_put(eb->context->vm); + /* + * This works in conjunction with eb_select_engine() to prevent + * i915_vma_parked() from interfering while execbuf validates vmas. + */ + if (eb->gt->info.id) + intel_gt_pm_put(to_gt(eb->gt->i915)); intel_gt_pm_put(eb->gt); for_each_child(eb->context, child) intel_context_put(child); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 3de7db70f4ed..2292404007c8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -17,6 +17,8 @@ #include "i915_selftest.h" #include "i915_vma_resource.h" +#include "gt/intel_gt_defines.h" + struct drm_i915_gem_object; struct intel_fronbuffer; struct intel_memory_region; @@ -675,7 +677,7 @@ struct drm_i915_gem_object { */ bool dirty:1; - u32 tlb; + u32 tlb[I915_MAX_GT]; } mm; struct { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 89fc8ea6bcfc..eae40e8f52ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -7,7 +7,7 @@ #include <drm/drm_cache.h> #include "gt/intel_gt.h" -#include "gt/intel_gt_pm.h" +#include "gt/intel_tlb.h" #include "i915_drv.h" #include "i915_gem_object.h" @@ -193,13 +193,16 @@ static void unmap_object(struct drm_i915_gem_object *obj, void *ptr) static void flush_tlb_invalidate(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct intel_gt *gt = to_gt(i915); + struct intel_gt *gt; + int id; - if (!obj->mm.tlb) - return; + for_each_gt(gt, i915, id) { + if (!obj->mm.tlb[id]) + return; - intel_gt_invalidate_tlb(gt, obj->mm.tlb); - obj->mm.tlb = 0; + intel_gt_invalidate_tlb_full(gt, obj->mm.tlb[id]); + obj->mm.tlb[id] = 0; + } } struct sg_table * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 3b094d36a0b0..5b0a5cf9a98a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -892,7 +892,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, } else { resource_size_t lmem_range; - lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHP_TILE0_ADDR_RANGE) & 0xFFFF; + lmem_range = intel_gt_mcr_read_any(to_gt(i915), XEHP_TILE0_ADDR_RANGE) & 0xFFFF; lmem_size = lmem_range >> XEHP_TILE_LMEM_RANGE_SHIFT; lmem_size *= SZ_1G; } diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index df6c9a84252c..6b9f6cf50bf6 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1246,8 +1246,10 @@ static int igt_write_huge(struct drm_i915_private *i915, * times in succession a possibility by enlarging the permutation array. */ order = i915_random_order(count * count, &prng); - if (!order) - return -ENOMEM; + if (!order) { + err = -ENOMEM; + goto out; + } max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); max = div_u64(max - size, max_page_size); diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c index 1c82caf525c3..8fe0499308ff 100644 --- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c @@ -76,7 +76,7 @@ int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode) cmd = MI_FLUSH; if (mode & EMIT_INVALIDATE) { cmd |= MI_EXE_FLUSH; - if (IS_G4X(rq->engine->i915) || GRAPHICS_VER(rq->engine->i915) == 5) + if (IS_G4X(rq->i915) || GRAPHICS_VER(rq->i915) == 5) cmd |= MI_INVALIDATE_ISP; } diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 23857cc08eca..8a3cbb01bcbe 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -39,11 +39,11 @@ int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode) * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL * pipe control. */ - if (GRAPHICS_VER(rq->engine->i915) == 9) + if (GRAPHICS_VER(rq->i915) == 9) vf_flush_wa = true; /* WaForGAMHang:kbl */ - if (IS_KBL_GRAPHICS_STEP(rq->engine->i915, 0, STEP_C0)) + if (IS_KBL_GRAPHICS_STEP(rq->i915, 0, STEP_C0)) dc_flush_wa = true; } @@ -165,14 +165,60 @@ static u32 preparser_disable(bool state) return MI_ARB_CHECK | 1 << 8 | state; } -u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg) +static i915_reg_t gen12_get_aux_inv_reg(struct intel_engine_cs *engine) { - u32 gsi_offset = gt->uncore->gsi_offset; + switch (engine->id) { + case RCS0: + return GEN12_CCS_AUX_INV; + case BCS0: + return GEN12_BCS0_AUX_INV; + case VCS0: + return GEN12_VD0_AUX_INV; + case VCS2: + return GEN12_VD2_AUX_INV; + case VECS0: + return GEN12_VE0_AUX_INV; + case CCS0: + return GEN12_CCS0_AUX_INV; + default: + return INVALID_MMIO_REG; + } +} + +static bool gen12_needs_ccs_aux_inv(struct intel_engine_cs *engine) +{ + i915_reg_t reg = gen12_get_aux_inv_reg(engine); + + if (IS_PONTEVECCHIO(engine->i915)) + return false; + + /* + * So far platforms supported by i915 having flat ccs do not require + * AUX invalidation. Check also whether the engine requires it. + */ + return i915_mmio_reg_valid(reg) && !HAS_FLAT_CCS(engine->i915); +} + +u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs) +{ + i915_reg_t inv_reg = gen12_get_aux_inv_reg(engine); + u32 gsi_offset = engine->gt->uncore->gsi_offset; + + if (!gen12_needs_ccs_aux_inv(engine)) + return cs; *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset; *cs++ = AUX_INV; - *cs++ = MI_NOOP; + + *cs++ = MI_SEMAPHORE_WAIT_TOKEN | + MI_SEMAPHORE_REGISTER_POLL | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset; + *cs++ = 0; + *cs++ = 0; return cs; } @@ -180,8 +226,8 @@ u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv static int mtl_dummy_pipe_control(struct i915_request *rq) { /* Wa_14016712196 */ - if (IS_MTL_GRAPHICS_STEP(rq->engine->i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(rq->engine->i915, P, STEP_A0, STEP_B0)) { + if (IS_MTL_GRAPHICS_STEP(rq->i915, M, STEP_A0, STEP_B0) || + IS_MTL_GRAPHICS_STEP(rq->i915, P, STEP_A0, STEP_B0)) { u32 *cs; /* dummy PIPE_CONTROL + depth flush */ @@ -202,8 +248,13 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) { struct intel_engine_cs *engine = rq->engine; - if (mode & EMIT_FLUSH) { - u32 flags = 0; + /* + * On Aux CCS platforms the invalidation of the Aux + * table requires quiescing memory traffic beforehand + */ + if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) { + u32 bit_group_0 = 0; + u32 bit_group_1 = 0; int err; u32 *cs; @@ -211,32 +262,40 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) if (err) return err; - flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; - flags |= PIPE_CONTROL_FLUSH_L3; - flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; - flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH; + + /* + * When required, in MTL and beyond platforms we + * need to set the CCS_FLUSH bit in the pipe control + */ + if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70)) + bit_group_0 |= PIPE_CONTROL_CCS_FLUSH; + + bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH; + bit_group_1 |= PIPE_CONTROL_FLUSH_L3; + bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; /* Wa_1409600907:tgl,adl-p */ - flags |= PIPE_CONTROL_DEPTH_STALL; - flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; - flags |= PIPE_CONTROL_FLUSH_ENABLE; + bit_group_1 |= PIPE_CONTROL_DEPTH_STALL; + bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE; + bit_group_1 |= PIPE_CONTROL_FLUSH_ENABLE; - flags |= PIPE_CONTROL_STORE_DATA_INDEX; - flags |= PIPE_CONTROL_QW_WRITE; + bit_group_1 |= PIPE_CONTROL_STORE_DATA_INDEX; + bit_group_1 |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_CS_STALL; + bit_group_1 |= PIPE_CONTROL_CS_STALL; if (!HAS_3D_PIPELINE(engine->i915)) - flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; + bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS; else if (engine->class == COMPUTE_CLASS) - flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; + bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); - cs = gen12_emit_pipe_control(cs, - PIPE_CONTROL0_HDC_PIPELINE_FLUSH, - flags, LRC_PPHWSP_SCRATCH_ADDR); + cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1, + LRC_PPHWSP_SCRATCH_ADDR); intel_ring_advance(rq, cs); } @@ -267,10 +326,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) else if (engine->class == COMPUTE_CLASS) flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; - if (!HAS_FLAT_CCS(rq->engine->i915)) - count = 8 + 4; - else - count = 8; + count = 8; + if (gen12_needs_ccs_aux_inv(rq->engine)) + count += 8; cs = intel_ring_begin(rq, count); if (IS_ERR(cs)) @@ -285,11 +343,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); - if (!HAS_FLAT_CCS(rq->engine->i915)) { - /* hsdes: 1809175790 */ - cs = gen12_emit_aux_table_inv(rq->engine->gt, - cs, GEN12_GFX_CCS_AUX_NV); - } + cs = gen12_emit_aux_table_inv(engine, cs); *cs++ = preparser_disable(false); intel_ring_advance(rq, cs); @@ -300,21 +354,14 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) { - intel_engine_mask_t aux_inv = 0; - u32 cmd, *cs; + u32 cmd = 4; + u32 *cs; - cmd = 4; if (mode & EMIT_INVALIDATE) { cmd += 2; - if (!HAS_FLAT_CCS(rq->engine->i915) && - (rq->engine->class == VIDEO_DECODE_CLASS || - rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) { - aux_inv = rq->engine->mask & - ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0); - if (aux_inv) - cmd += 4; - } + if (gen12_needs_ccs_aux_inv(rq->engine)) + cmd += 8; } cs = intel_ring_begin(rq, cmd); @@ -338,6 +385,10 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) cmd |= MI_INVALIDATE_TLB; if (rq->engine->class == VIDEO_DECODE_CLASS) cmd |= MI_INVALIDATE_BSD; + + if (gen12_needs_ccs_aux_inv(rq->engine) && + rq->engine->class == COPY_ENGINE_CLASS) + cmd |= MI_FLUSH_DW_CCS; } *cs++ = cmd; @@ -345,14 +396,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) *cs++ = 0; /* upper addr */ *cs++ = 0; /* value */ - if (aux_inv) { /* hsdes: 1809175790 */ - if (rq->engine->class == VIDEO_DECODE_CLASS) - cs = gen12_emit_aux_table_inv(rq->engine->gt, - cs, GEN12_VD0_AUX_NV); - else - cs = gen12_emit_aux_table_inv(rq->engine->gt, - cs, GEN12_VE0_AUX_NV); - } + cs = gen12_emit_aux_table_inv(rq->engine, cs); if (mode & EMIT_INVALIDATE) *cs++ = preparser_disable(false); @@ -754,7 +798,7 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { - struct drm_i915_private *i915 = rq->engine->i915; + struct drm_i915_private *i915 = rq->i915; u32 flags = (PIPE_CONTROL_CS_STALL | PIPE_CONTROL_TLB_INVALIDATE | PIPE_CONTROL_TILE_CACHE_FLUSH | < |
