summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2023-08-07 13:49:24 +1000
committerDave Airlie <airlied@redhat.com>2023-08-07 13:49:25 +1000
commitd9aa1da9a8cfb0387eb5703c15bd1f54421460ac (patch)
treefd64841d76f3408ed5d25f4d9aaa5088a752e2fd
parent7c9aa0f7463eede3226daf06ff7ccbb813f8b739 (diff)
parent28e671114fb0f28f334fac8d0a6b9c395c7b0498 (diff)
downloadlinux-d9aa1da9a8cfb0387eb5703c15bd1f54421460ac.tar.gz
linux-d9aa1da9a8cfb0387eb5703c15bd1f54421460ac.tar.bz2
linux-d9aa1da9a8cfb0387eb5703c15bd1f54421460ac.zip
Merge tag 'drm-intel-gt-next-2023-08-04' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
Driver Changes: - Avoid infinite GPU waits by avoidin premature release of request's reusable memory (Chris, Janusz) - Expose RPS thresholds in sysfs (Tvrtko) - Apply GuC SLPC min frequency softlimit correctly (Vinay) - Restore SLPC efficient freq earlier (Vinay) - Consider OA buffer boundary when zeroing out reports (Umesh) - Extend Wa_14015795083 to TGL, RKL, DG1 and ADL (Matt R) - Fix context workarounds with non-masked regs on MTL/DG2 (Lucas) - Enable the CCS_FLUSH bit in the pipe control and in the CS for MTL+ (Andi) - Update MTL workarounds 14018778641, 22016122933 (Tejas, Zhanjun) - Ensure memory quiesced before AUX CCS invalidation (Jonathan) - Add a gsc_info debugfs (Daniele) - Invalidate the TLBs on each GT on multi-GT device (Chris) - Fix a VMA UAF for multi-gt platform (Nirmoy) - Do not use stolen on MTL due to HW bug (Nirmoy) - Check HuC and GuC version compatibility on MTL (Daniele) - Dump perf_limit_reasons for slow GuC init debug (Vinay) - Replace kmap() with kmap_local_page() (Sumitra, Ira) - Add sentinel to xehp_oa_b_counters for KASAN (Andrzej) - Add the gen12_needs_ccs_aux_inv helper (Andi) - Fixes and updates for GSC memory allocation (Daniele) - Fix one wrong caching mode enum usage (Tvrtko) - Fixes for GSC wakeref (Alan) - Static checker fixes (Harshit, Arnd, Dan, Cristophe, David, Andi) - Rename flags with bit_group_X according to the datasheet (Andi) - Use direct alias for i915 in requests (Andrzej) - Replace i915->gt0 with to_gt(i915) (Andi) - Use the i915_vma_flush_writes helper (Tvrtko) - Selftest improvements (Alan) - Remove dead code (Tvrtko) Signed-off-by: Dave Airlie <airlied@redhat.com> # Conflicts: # drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/ZMy6kDd9npweR4uy@jlahtine-mobl.ger.corp.intel.com
-rw-r--r--drivers/gpu/drm/i915/Makefile4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_domain.c6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c25
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c15
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_stolen.c2
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_pages.c6
-rw-r--r--drivers/gpu/drm/i915/gt/gen2_engine_cs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.c152
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.h21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c144
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_defines.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h16
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c108
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c26
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_region_lmem.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c83
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_tlb.c159
-rw-r--r--drivers/gpu/drm/i915/gt/intel_tlb.h29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c148
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_cs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_mocs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rc6.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_timeline.c4
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_tlb.c3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_binary_headers.h75
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c334
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.h5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c139
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.h21
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.c39
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_debugfs.h14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c8
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c22
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c6
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c13
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c126
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h4
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c2
-rw-r--r--drivers/gpu/drm/i915/i915_active.c99
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h1
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c4
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c3
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h26
-rw-r--r--drivers/gpu/drm/i915/i915_request.c13
-rw-r--r--drivers/gpu/drm/i915/i915_trace.h10
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c15
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp.c8
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_gsccs.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_perf.c2
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_selftest.c31
-rw-r--r--drivers/gpu/drm/i915/selftests/igt_spinner.c14
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gem_device.c2
65 files changed, 1529 insertions, 531 deletions
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index dcbda9ba32dd..79f65eff6bb2 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -132,6 +132,7 @@ gt-y += \
gt/intel_sseu.o \
gt/intel_sseu_debugfs.o \
gt/intel_timeline.o \
+ gt/intel_tlb.o \
gt/intel_wopcm.o \
gt/intel_workarounds.o \
gt/shmem_utils.o \
@@ -197,7 +198,8 @@ i915-y += \
gt/uc/intel_gsc_fw.o \
gt/uc/intel_gsc_proxy.o \
gt/uc/intel_gsc_uc.o \
- gt/uc/intel_gsc_uc_heci_cmd_submit.o\
+ gt/uc/intel_gsc_uc_debugfs.o \
+ gt/uc/intel_gsc_uc_heci_cmd_submit.o \
gt/uc/intel_guc.o \
gt/uc/intel_guc_ads.o \
gt/uc/intel_guc_capture.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index dfaaa8b66ac3..ffddec1d2a76 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -68,10 +68,8 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
switch (obj->write_domain) {
case I915_GEM_DOMAIN_GTT:
spin_lock(&obj->vma.lock);
- for_each_ggtt_vma(vma, obj) {
- if (i915_vma_unset_ggtt_write(vma))
- intel_gt_flush_ggtt_writes(vma->vm->gt);
- }
+ for_each_ggtt_vma(vma, obj)
+ i915_vma_flush_writes(vma);
spin_unlock(&obj->vma.lock);
i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index cfd7929587d8..5a687a3686bd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2229,8 +2229,8 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
u32 *cs;
int i;
- if (GRAPHICS_VER(rq->engine->i915) != 7 || rq->engine->id != RCS0) {
- drm_dbg(&rq->engine->i915->drm, "sol reset is gen7/rcs only\n");
+ if (GRAPHICS_VER(rq->i915) != 7 || rq->engine->id != RCS0) {
+ drm_dbg(&rq->i915->drm, "sol reset is gen7/rcs only\n");
return -EINVAL;
}
@@ -2691,6 +2691,7 @@ static int
eb_select_engine(struct i915_execbuffer *eb)
{
struct intel_context *ce, *child;
+ struct intel_gt *gt;
unsigned int idx;
int err;
@@ -2714,10 +2715,17 @@ eb_select_engine(struct i915_execbuffer *eb)
}
}
eb->num_batches = ce->parallel.number_children + 1;
+ gt = ce->engine->gt;
for_each_child(ce, child)
intel_context_get(child);
- intel_gt_pm_get(ce->engine->gt);
+ intel_gt_pm_get(gt);
+ /*
+ * Keep GT0 active on MTL so that i915_vma_parked() doesn't
+ * free VMAs while execbuf ioctl is validating VMAs.
+ */
+ if (gt->info.id)
+ intel_gt_pm_get(to_gt(gt->i915));
if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
err = intel_context_alloc_state(ce);
@@ -2756,7 +2764,10 @@ eb_select_engine(struct i915_execbuffer *eb)
return err;
err:
- intel_gt_pm_put(ce->engine->gt);
+ if (gt->info.id)
+ intel_gt_pm_put(to_gt(gt->i915));
+
+ intel_gt_pm_put(gt);
for_each_child(ce, child)
intel_context_put(child);
intel_context_put(ce);
@@ -2769,6 +2780,12 @@ eb_put_engine(struct i915_execbuffer *eb)
struct intel_context *child;
i915_vm_put(eb->context->vm);
+ /*
+ * This works in conjunction with eb_select_engine() to prevent
+ * i915_vma_parked() from interfering while execbuf validates vmas.
+ */
+ if (eb->gt->info.id)
+ intel_gt_pm_put(to_gt(eb->gt->i915));
intel_gt_pm_put(eb->gt);
for_each_child(eb->context, child)
intel_context_put(child);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 3de7db70f4ed..2292404007c8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -17,6 +17,8 @@
#include "i915_selftest.h"
#include "i915_vma_resource.h"
+#include "gt/intel_gt_defines.h"
+
struct drm_i915_gem_object;
struct intel_fronbuffer;
struct intel_memory_region;
@@ -675,7 +677,7 @@ struct drm_i915_gem_object {
*/
bool dirty:1;
- u32 tlb;
+ u32 tlb[I915_MAX_GT];
} mm;
struct {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 89fc8ea6bcfc..eae40e8f52ed 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -7,7 +7,7 @@
#include <drm/drm_cache.h>
#include "gt/intel_gt.h"
-#include "gt/intel_gt_pm.h"
+#include "gt/intel_tlb.h"
#include "i915_drv.h"
#include "i915_gem_object.h"
@@ -193,13 +193,16 @@ static void unmap_object(struct drm_i915_gem_object *obj, void *ptr)
static void flush_tlb_invalidate(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct intel_gt *gt = to_gt(i915);
+ struct intel_gt *gt;
+ int id;
- if (!obj->mm.tlb)
- return;
+ for_each_gt(gt, i915, id) {
+ if (!obj->mm.tlb[id])
+ return;
- intel_gt_invalidate_tlb(gt, obj->mm.tlb);
- obj->mm.tlb = 0;
+ intel_gt_invalidate_tlb_full(gt, obj->mm.tlb[id]);
+ obj->mm.tlb[id] = 0;
+ }
}
struct sg_table *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 3b094d36a0b0..5b0a5cf9a98a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -892,7 +892,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
} else {
resource_size_t lmem_range;
- lmem_range = intel_gt_mcr_read_any(&i915->gt0, XEHP_TILE0_ADDR_RANGE) & 0xFFFF;
+ lmem_range = intel_gt_mcr_read_any(to_gt(i915), XEHP_TILE0_ADDR_RANGE) & 0xFFFF;
lmem_size = lmem_range >> XEHP_TILE_LMEM_RANGE_SHIFT;
lmem_size *= SZ_1G;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index df6c9a84252c..6b9f6cf50bf6 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -1246,8 +1246,10 @@ static int igt_write_huge(struct drm_i915_private *i915,
* times in succession a possibility by enlarging the permutation array.
*/
order = i915_random_order(count * count, &prng);
- if (!order)
- return -ENOMEM;
+ if (!order) {
+ err = -ENOMEM;
+ goto out;
+ }
max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg);
max = div_u64(max - size, max_page_size);
diff --git a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
index 1c82caf525c3..8fe0499308ff 100644
--- a/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen2_engine_cs.c
@@ -76,7 +76,7 @@ int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode)
cmd = MI_FLUSH;
if (mode & EMIT_INVALIDATE) {
cmd |= MI_EXE_FLUSH;
- if (IS_G4X(rq->engine->i915) || GRAPHICS_VER(rq->engine->i915) == 5)
+ if (IS_G4X(rq->i915) || GRAPHICS_VER(rq->i915) == 5)
cmd |= MI_INVALIDATE_ISP;
}
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 23857cc08eca..8a3cbb01bcbe 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -39,11 +39,11 @@ int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode)
* On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
* pipe control.
*/
- if (GRAPHICS_VER(rq->engine->i915) == 9)
+ if (GRAPHICS_VER(rq->i915) == 9)
vf_flush_wa = true;
/* WaForGAMHang:kbl */
- if (IS_KBL_GRAPHICS_STEP(rq->engine->i915, 0, STEP_C0))
+ if (IS_KBL_GRAPHICS_STEP(rq->i915, 0, STEP_C0))
dc_flush_wa = true;
}
@@ -165,14 +165,60 @@ static u32 preparser_disable(bool state)
return MI_ARB_CHECK | 1 << 8 | state;
}
-u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg)
+static i915_reg_t gen12_get_aux_inv_reg(struct intel_engine_cs *engine)
{
- u32 gsi_offset = gt->uncore->gsi_offset;
+ switch (engine->id) {
+ case RCS0:
+ return GEN12_CCS_AUX_INV;
+ case BCS0:
+ return GEN12_BCS0_AUX_INV;
+ case VCS0:
+ return GEN12_VD0_AUX_INV;
+ case VCS2:
+ return GEN12_VD2_AUX_INV;
+ case VECS0:
+ return GEN12_VE0_AUX_INV;
+ case CCS0:
+ return GEN12_CCS0_AUX_INV;
+ default:
+ return INVALID_MMIO_REG;
+ }
+}
+
+static bool gen12_needs_ccs_aux_inv(struct intel_engine_cs *engine)
+{
+ i915_reg_t reg = gen12_get_aux_inv_reg(engine);
+
+ if (IS_PONTEVECCHIO(engine->i915))
+ return false;
+
+ /*
+ * So far platforms supported by i915 having flat ccs do not require
+ * AUX invalidation. Check also whether the engine requires it.
+ */
+ return i915_mmio_reg_valid(reg) && !HAS_FLAT_CCS(engine->i915);
+}
+
+u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
+{
+ i915_reg_t inv_reg = gen12_get_aux_inv_reg(engine);
+ u32 gsi_offset = engine->gt->uncore->gsi_offset;
+
+ if (!gen12_needs_ccs_aux_inv(engine))
+ return cs;
*cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
*cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
*cs++ = AUX_INV;
- *cs++ = MI_NOOP;
+
+ *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
+ MI_SEMAPHORE_REGISTER_POLL |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD;
+ *cs++ = 0;
+ *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
+ *cs++ = 0;
+ *cs++ = 0;
return cs;
}
@@ -180,8 +226,8 @@ u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv
static int mtl_dummy_pipe_control(struct i915_request *rq)
{
/* Wa_14016712196 */
- if (IS_MTL_GRAPHICS_STEP(rq->engine->i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(rq->engine->i915, P, STEP_A0, STEP_B0)) {
+ if (IS_MTL_GRAPHICS_STEP(rq->i915, M, STEP_A0, STEP_B0) ||
+ IS_MTL_GRAPHICS_STEP(rq->i915, P, STEP_A0, STEP_B0)) {
u32 *cs;
/* dummy PIPE_CONTROL + depth flush */
@@ -202,8 +248,13 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
{
struct intel_engine_cs *engine = rq->engine;
- if (mode & EMIT_FLUSH) {
- u32 flags = 0;
+ /*
+ * On Aux CCS platforms the invalidation of the Aux
+ * table requires quiescing memory traffic beforehand
+ */
+ if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) {
+ u32 bit_group_0 = 0;
+ u32 bit_group_1 = 0;
int err;
u32 *cs;
@@ -211,32 +262,40 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
if (err)
return err;
- flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
- flags |= PIPE_CONTROL_FLUSH_L3;
- flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
- flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
+
+ /*
+ * When required, in MTL and beyond platforms we
+ * need to set the CCS_FLUSH bit in the pipe control
+ */
+ if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
+ bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
+
+ bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+ bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
+ bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+ bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
/* Wa_1409600907:tgl,adl-p */
- flags |= PIPE_CONTROL_DEPTH_STALL;
- flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
- flags |= PIPE_CONTROL_FLUSH_ENABLE;
+ bit_group_1 |= PIPE_CONTROL_DEPTH_STALL;
+ bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+ bit_group_1 |= PIPE_CONTROL_FLUSH_ENABLE;
- flags |= PIPE_CONTROL_STORE_DATA_INDEX;
- flags |= PIPE_CONTROL_QW_WRITE;
+ bit_group_1 |= PIPE_CONTROL_STORE_DATA_INDEX;
+ bit_group_1 |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_CS_STALL;
+ bit_group_1 |= PIPE_CONTROL_CS_STALL;
if (!HAS_3D_PIPELINE(engine->i915))
- flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
+ bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
else if (engine->class == COMPUTE_CLASS)
- flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
+ bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
- cs = gen12_emit_pipe_control(cs,
- PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
- flags, LRC_PPHWSP_SCRATCH_ADDR);
+ cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1,
+ LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(rq, cs);
}
@@ -267,10 +326,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
else if (engine->class == COMPUTE_CLASS)
flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
- if (!HAS_FLAT_CCS(rq->engine->i915))
- count = 8 + 4;
- else
- count = 8;
+ count = 8;
+ if (gen12_needs_ccs_aux_inv(rq->engine))
+ count += 8;
cs = intel_ring_begin(rq, count);
if (IS_ERR(cs))
@@ -285,11 +343,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
- if (!HAS_FLAT_CCS(rq->engine->i915)) {
- /* hsdes: 1809175790 */
- cs = gen12_emit_aux_table_inv(rq->engine->gt,
- cs, GEN12_GFX_CCS_AUX_NV);
- }
+ cs = gen12_emit_aux_table_inv(engine, cs);
*cs++ = preparser_disable(false);
intel_ring_advance(rq, cs);
@@ -300,21 +354,14 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
{
- intel_engine_mask_t aux_inv = 0;
- u32 cmd, *cs;
+ u32 cmd = 4;
+ u32 *cs;
- cmd = 4;
if (mode & EMIT_INVALIDATE) {
cmd += 2;
- if (!HAS_FLAT_CCS(rq->engine->i915) &&
- (rq->engine->class == VIDEO_DECODE_CLASS ||
- rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
- aux_inv = rq->engine->mask &
- ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
- if (aux_inv)
- cmd += 4;
- }
+ if (gen12_needs_ccs_aux_inv(rq->engine))
+ cmd += 8;
}
cs = intel_ring_begin(rq, cmd);
@@ -338,6 +385,10 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
cmd |= MI_INVALIDATE_TLB;
if (rq->engine->class == VIDEO_DECODE_CLASS)
cmd |= MI_INVALIDATE_BSD;
+
+ if (gen12_needs_ccs_aux_inv(rq->engine) &&
+ rq->engine->class == COPY_ENGINE_CLASS)
+ cmd |= MI_FLUSH_DW_CCS;
}
*cs++ = cmd;
@@ -345,14 +396,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
*cs++ = 0; /* upper addr */
*cs++ = 0; /* value */
- if (aux_inv) { /* hsdes: 1809175790 */
- if (rq->engine->class == VIDEO_DECODE_CLASS)
- cs = gen12_emit_aux_table_inv(rq->engine->gt,
- cs, GEN12_VD0_AUX_NV);
- else
- cs = gen12_emit_aux_table_inv(rq->engine->gt,
- cs, GEN12_VE0_AUX_NV);
- }
+ cs = gen12_emit_aux_table_inv(rq->engine, cs);
if (mode & EMIT_INVALIDATE)
*cs++ = preparser_disable(false);
@@ -754,7 +798,7 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
{
- struct drm_i915_private *i915 = rq->engine->i915;
+ struct drm_i915_private *i915 = rq->i915;
u32 flags = (PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TLB_INVALIDATE |
PIPE_CONTROL_TILE_CACHE_FLUSH |
<