diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_workarounds.c')
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_workarounds.c | 1402 |
1 files changed, 1402 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c new file mode 100644 index 000000000000..f46ed0e2f07c --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -0,0 +1,1402 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_workarounds.h" + +/** + * DOC: Hardware workarounds + * + * This file is intended as a central place to implement most [1]_ of the + * required workarounds for hardware to work as originally intended. They fall + * in five basic categories depending on how/when they are applied: + * + * - Workarounds that touch registers that are saved/restored to/from the HW + * context image. The list is emitted (via Load Register Immediate commands) + * everytime a new context is created. + * - GT workarounds. The list of these WAs is applied whenever these registers + * revert to default values (on GPU reset, suspend/resume [2]_, etc..). + * - Display workarounds. The list is applied during display clock-gating + * initialization. + * - Workarounds that whitelist a privileged register, so that UMDs can manage + * them directly. This is just a special case of a MMMIO workaround (as we + * write the list of these to/be-whitelisted registers to some special HW + * registers). + * - Workaround batchbuffers, that get executed automatically by the hardware + * on every HW context restore. + * + * .. [1] Please notice that there are other WAs that, due to their nature, + * cannot be applied from a central place. Those are peppered around the rest + * of the code, as needed. + * + * .. [2] Technically, some registers are powercontext saved & restored, so they + * survive a suspend/resume. In practice, writing them again is not too + * costly and simplifies things. We can revisit this in the future. + * + * Layout + * '''''' + * + * Keep things in this file ordered by WA type, as per the above (context, GT, + * display, register whitelist, batchbuffer). Then, inside each type, keep the + * following order: + * + * - Infrastructure functions and macros + * - WAs per platform in standard gen/chrono order + * - Public functions to init or apply the given workaround type. + */ + +static void wa_init_start(struct i915_wa_list *wal, const char *name) +{ + wal->name = name; +} + +#define WA_LIST_CHUNK (1 << 4) + +static void wa_init_finish(struct i915_wa_list *wal) +{ + /* Trim unused entries. */ + if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) { + struct i915_wa *list = kmemdup(wal->list, + wal->count * sizeof(*list), + GFP_KERNEL); + + if (list) { + kfree(wal->list); + wal->list = list; + } + } + + if (!wal->count) + return; + + DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n", + wal->wa_count, wal->name); +} + +static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) +{ + unsigned int addr = i915_mmio_reg_offset(wa->reg); + unsigned int start = 0, end = wal->count; + const unsigned int grow = WA_LIST_CHUNK; + struct i915_wa *wa_; + + GEM_BUG_ON(!is_power_of_2(grow)); + + if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */ + struct i915_wa *list; + + list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa), + GFP_KERNEL); + if (!list) { + DRM_ERROR("No space for workaround init!\n"); + return; + } + + if (wal->list) + memcpy(list, wal->list, sizeof(*wa) * wal->count); + + wal->list = list; + } + + while (start < end) { + unsigned int mid = start + (end - start) / 2; + + if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) { + start = mid + 1; + } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) { + end = mid; + } else { + wa_ = &wal->list[mid]; + + if ((wa->mask & ~wa_->mask) == 0) { + DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n", + i915_mmio_reg_offset(wa_->reg), + wa_->mask, wa_->val); + + wa_->val &= ~wa->mask; + } + + wal->wa_count++; + wa_->val |= wa->val; + wa_->mask |= wa->mask; + wa_->read |= wa->read; + return; + } + } + + wal->wa_count++; + wa_ = &wal->list[wal->count++]; + *wa_ = *wa; + + while (wa_-- > wal->list) { + GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) == + i915_mmio_reg_offset(wa_[1].reg)); + if (i915_mmio_reg_offset(wa_[1].reg) > + i915_mmio_reg_offset(wa_[0].reg)) + break; + + swap(wa_[1], wa_[0]); + } +} + +static void +wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, + u32 val) +{ + struct i915_wa wa = { + .reg = reg, + .mask = mask, + .val = val, + .read = mask, + }; + + _wa_add(wal, &wa); +} + +static void +wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val)); +} + +static void +wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + wa_write_masked_or(wal, reg, ~0, val); +} + +static void +wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) +{ + wa_write_masked_or(wal, reg, val, val); +} + +static void +ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) +{ + struct i915_wa wa = { + .reg = reg, + .mask = mask, + .val = val, + /* Bonkers HW, skip verifying */ + }; + + _wa_add(wal, &wa); +} + +#define WA_SET_BIT_MASKED(addr, mask) \ + wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask)) + +#define WA_CLR_BIT_MASKED(addr, mask) \ + wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask)) + +#define WA_SET_FIELD_MASKED(addr, mask, value) \ + wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value))) + +static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct i915_wa_list *wal = &engine->ctx_wa_list; + + WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); + + /* WaDisableAsyncFlipPerfMode:bdw,chv */ + WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); + + /* WaDisablePartialInstShootdown:bdw,chv */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + + /* Use Force Non-Coherent whenever executing a 3D context. This is a + * workaround for for a possible hang in the unlikely event a TLB + * invalidation occurs during a PSD flush. + */ + /* WaForceEnableNonCoherent:bdw,chv */ + /* WaHdcDisableFetchWhenMasked:bdw,chv */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_DONOT_FETCH_MEM_WHEN_MASKED | + HDC_FORCE_NON_COHERENT); + + /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: + * "The Hierarchical Z RAW Stall Optimization allows non-overlapping + * polygons in the same 8x4 pixel/sample area to be processed without + * stalling waiting for the earlier ones to write to Hierarchical Z + * buffer." + * + * This optimization is off by default for BDW and CHV; turn it on. + */ + WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); + + /* Wa4x4STCOptimizationDisable:bdw,chv */ + WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK, + GEN6_WIZ_HASHING_16x4); +} + +static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; + + gen8_ctx_workarounds_init(engine); + + /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + + /* WaDisableDopClockGating:bdw + * + * Also see the related UCGTCL1 write in broadwell_init_clock_gating() + * to disable EUTC clock gating. + */ + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + DOP_CLOCK_GATING_DISABLE); + + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); + + WA_SET_BIT_MASKED(HDC_CHICKEN0, + /* WaForceContextSaveRestoreNonCoherent:bdw */ + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ + (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); +} + +static void chv_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct i915_wa_list *wal = &engine->ctx_wa_list; + + gen8_ctx_workarounds_init(engine); + + /* WaDisableThreadStallDopClockGating:chv */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + + /* Improve HiZ throughput on CHV. */ + WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); +} + +static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; + + if (HAS_LLC(i915)) { + /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl + * + * Must match Display Engine. See + * WaCompressedResourceDisplayNewHashMode. + */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN9_PBE_COMPRESSED_HASH_SELECTION); + WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, + GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); + } + + /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ + /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + FLOW_CONTROL_ENABLE | + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + + /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ + if (!IS_COFFEELAKE(i915)) + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); + + /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ + /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ + WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, + GEN9_ENABLE_YV12_BUGFIX | + GEN9_ENABLE_GPGPU_PREEMPTION); + + /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ + /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ + WA_SET_BIT_MASKED(CACHE_MODE_1, + GEN8_4x4_STC_OPTIMIZATION_DISABLE | + GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); + + /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ + WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, + GEN9_CCS_TLB_PREFETCH_ENABLE); + + /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); + + /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are + * both tied to WaForceContextSaveRestoreNonCoherent + * in some hsds for skl. We keep the tie for all gen9. The + * documentation is a bit hazy and so we want to get common behaviour, + * even though there is no clear evidence we would need both on kbl/bxt. + * This area has been source of system hangs so we play it safe + * and mimic the skl regardless of what bspec says. + * + * Use Force Non-Coherent whenever executing a 3D context. This + * is a workaround for a possible hang in the unlikely event + * a TLB invalidation occurs during a PSD flush. + */ + + /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_NON_COHERENT); + + /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ + if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); + + /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); + + /* + * Supporting preemption with fine-granularity requires changes in the + * batch buffer programming. Since we can't break old userspace, we + * need to set our default preemption level to safe value. Userspace is + * still able to use more fine-grained preemption levels, since in + * WaEnablePreemptionGranularityControlByUMD we're whitelisting the + * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are + * not real HW workarounds, but merely a way to start using preemption + * while maintaining old contract with userspace. + */ + + /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); + + /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, + GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + + /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ + if (IS_GEN9_LP(i915)) + WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); +} + +static void skl_tune_iz_hashing(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; + u8 vals[3] = { 0, 0, 0 }; + unsigned int i; + + for (i = 0; i < 3; i++) { + u8 ss; + + /* + * Only consider slices where one, and only one, subslice has 7 + * EUs + */ + if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i])) + continue; + + /* + * subslice_7eu[i] != 0 (because of the check above) and + * ss_max == 4 (maximum number of subslices possible per slice) + * + * -> 0 <= ss <= 3; + */ + ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1; + vals[i] = 3 - ss; + } + + if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) + return; + + /* Tune IZ hashing. See intel_device_info_runtime_init() */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN9_IZ_HASHING_MASK(2) | + GEN9_IZ_HASHING_MASK(1) | + GEN9_IZ_HASHING_MASK(0), + GEN9_IZ_HASHING(2, vals[2]) | + GEN9_IZ_HASHING(1, vals[1]) | + GEN9_IZ_HASHING(0, vals[0])); +} + +static void skl_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + gen9_ctx_workarounds_init(engine); + skl_tune_iz_hashing(engine); +} + +static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct i915_wa_list *wal = &engine->ctx_wa_list; + + gen9_ctx_workarounds_init(engine); + + /* WaDisableThreadStallDopClockGating:bxt */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + STALL_DOP_GATING_DISABLE); + + /* WaToEnableHwFixForPushConstHWBug:bxt */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); +} + +static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; + + gen9_ctx_workarounds_init(engine); + + /* WaToEnableHwFixForPushConstHWBug:kbl */ + if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableSbeCacheDispatchPortSharing:kbl */ + WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); +} + +static void glk_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct i915_wa_list *wal = &engine->ctx_wa_list; + + gen9_ctx_workarounds_init(engine); + + /* WaToEnableHwFixForPushConstHWBug:glk */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); +} + +static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct i915_wa_list *wal = &engine->ctx_wa_list; + + gen9_ctx_workarounds_init(engine); + + /* WaToEnableHwFixForPushConstHWBug:cfl */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableSbeCacheDispatchPortSharing:cfl */ + WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); +} + +static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; + + /* WaForceContextSaveRestoreNonCoherent:cnl */ + WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); + + /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ + if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); + + /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ + if (IS_CNL_REVID(i915, 0, CNL_REVID_B0)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); + + /* WaPushConstantDereferenceHoldDisable:cnl */ + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); + + /* FtrEnableFastAnisoL1BankingFix:cnl */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); + + /* WaDisable3DMidCmdPreemption:cnl */ + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); + + /* WaDisableGPGPUMidCmdPreemption:cnl */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, + GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + + /* WaDisableEarlyEOT:cnl */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); +} + +static void icl_ctx_workarounds_init(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; + + /* Wa_1604370585:icl (pre-prod) + * Formerly known as WaPushConstantDereferenceHoldDisable + */ + if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + PUSH_CONSTANT_DEREF_DISABLE); + + /* WaForceEnableNonCoherent:icl + * This is not the same workaround as in early Gen9 platforms, where + * lacking this could cause system hangs, but coherency performance + * overhead is high and only a few compute workloads really need it + * (the register is whitelisted in hardware now, so UMDs can opt in + * for coherency if they have a good reason). + */ + WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); + + /* Wa_2006611047:icl (pre-prod) + * Formerly known as WaDisableImprovedTdlClkGating + */ + if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + GEN11_TDL_CLOCK_GATING_FIX_DISABLE); + + /* WaEnableStateCacheRedirectToCS:icl */ + WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN1, + GEN11_STATE_CACHE_REDIRECT_TO_CS); + + /* Wa_2006665173:icl (pre-prod) */ + if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) + WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, + GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); + + /* WaEnableFloatBlendOptimization:icl */ + wa_write_masked_or(wal, + GEN10_CACHE_MODE_SS, + 0, /* write-only, so skip validation */ + _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); + + /* WaDisableGPGPUMidThreadPreemption:icl */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, + GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); +} + +void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *wal = &engine->ctx_wa_list; + + wa_init_start(wal, "context"); + + if (IS_GEN(i915, 11)) + icl_ctx_workarounds_init(engine); + else if (IS_CANNONLAKE(i915)) + cnl_ctx_workarounds_init(engine); + else if (IS_COFFEELAKE(i915)) + cfl_ctx_workarounds_init(engine); + else if (IS_GEMINILAKE(i915)) + glk_ctx_workarounds_init(engine); + else if (IS_KABYLAKE(i915)) + kbl_ctx_workarounds_init(engine); + else if (IS_BROXTON(i915)) + bxt_ctx_workarounds_init(engine); + else if (IS_SKYLAKE(i915)) + skl_ctx_workarounds_init(engine); + else if (IS_CHERRYVIEW(i915)) + chv_ctx_workarounds_init(engine); + else if (IS_BROADWELL(i915)) + bdw_ctx_workarounds_init(engine); + else if (INTEL_GEN(i915) < 8) + return; + else + MISSING_CASE(INTEL_GEN(i915)); + + wa_init_finish(wal); +} + +int intel_engine_emit_ctx_wa(struct i915_request *rq) +{ + struct i915_wa_list *wal = &rq->engine->ctx_wa_list; + struct i915_wa *wa; + unsigned int i; + u32 *cs; + int ret; + + if (wal->count == 0) + return 0; + + ret = rq->engine->emit_flush(rq, EMIT_BARRIER); + if (ret) + return ret; + + cs = intel_ring_begin(rq, (wal->count * 2 + 2)); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(wal->count); + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { + *cs++ = i915_mmio_reg_offset(wa->reg); + *cs++ = wa->val; + } + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + ret = rq->engine->emit_flush(rq, EMIT_BARRIER); + if (ret) + return ret; + + return 0; +} + +static void +gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + /* WaDisableKillLogic:bxt,skl,kbl */ + if (!IS_COFFEELAKE(i915)) + wa_write_or(wal, + GAM_ECOCHK, + ECOCHK_DIS_TLB); + + if (HAS_LLC(i915)) { + /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl + * + * Must match Display Engine. See + * WaCompressedResourceDisplayNewHashMode. + */ + wa_write_or(wal, + MMCD_MISC_CTRL, + MMCD_PCLA | MMCD_HOTSPOT_EN); + } + + /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ + wa_write_or(wal, + GAM_ECOCHK, + BDW_DISABLE_HDC_INVALIDATION); +} + +static void +skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + gen9_gt_workarounds_init(i915, wal); + + /* WaDisableGafsUnitClkGating:skl */ + wa_write_or(wal, + GEN7_UCGCTL4, + GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaInPlaceDecompressionHang:skl */ + if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER)) + wa_write_or(wal, + GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); +} + +static void +bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + gen9_gt_workarounds_init(i915, wal); + + /* WaInPlaceDecompressionHang:bxt */ + wa_write_or(wal, + GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); +} + +static void +kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + gen9_gt_workarounds_init(i915, wal); + + /* WaDisableDynamicCreditSharing:kbl */ + if (IS_KBL_REVID(i915, 0, KBL_REVID_B0)) + wa_write_or(wal, + GAMT_CHKN_BIT_REG, + GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); + + /* WaDisableGafsUnitClkGating:kbl */ + wa_write_or(wal, + GEN7_UCGCTL4, + GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaInPlaceDecompressionHang:kbl */ + wa_write_or(wal, + GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); +} + +static void +glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + gen9_gt_workarounds_init(i915, wal); +} + +static void +cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + gen9_gt_workarounds_init(i915, wal); + + /* WaDisableGafsUnitClkGating:cfl */ + wa_write_or(wal, + GEN7_UCGCTL4, + GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaInPlaceDecompressionHang:cfl */ + wa_write_or(wal, + GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); +} + +static void +wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; + u32 mcr_slice_subslice_mask; + + /* + * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl + * L3Banks could be fused off in single slice scenario. If that is + * the case, we might need to program MCR select to a valid L3Bank + * by default, to make sure we correctly read certain registers + * later on (in the range 0xB100 - 0xB3FF). + * This might be incompatible with + * WaProgramMgsrForCorrectSliceSpecificMmioReads. + * Fortunately, this should not happen in production hardware, so + * we only assert that this is the case (instead of implementing + * something more complex that requires checking the range of every + * MMIO read). + */ + if (INTEL_GEN(i915) >= 10 && + is_power_of_2(sseu->slice_mask)) { + /* + * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches + * enabled subslice, no need to redirect MCR packet + */ + u32 slice = fls(sseu->slice_mask); + u32 fuse3 = + intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3); + u8 ss_mask = sseu->subslice_mask[slice]; + + u8 enabled_mask = (ss_mask | ss_mask >> + GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK; + u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK; + + /* + * Production silicon should have matched L3Bank and + * subslice enabled + */ + WARN_ON((enabled_mask & disabled_mask) != enabled_mask); + } + + if (INTEL_GEN(i915) >= 11) + mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK | + GEN11_MCR_SUBSLICE_MASK; + else + mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK | + GEN8_MCR_SUBSLICE_MASK; + /* + * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl + * Before any MMIO read into slice/subslice specific registers, MCR + * packet control register needs to be programmed to point to any + * enabled s/ss pair. Otherwise, incorrect values will be returned. + * This means each subsequent MMIO read will be forwarded to an + * specific s/ss combination, but this is OK since these registers + * are consistent across s/ss in almost all cases. In the rare + * occasions, such as INSTDONE, where this value is dependent + * on s/ss combo, the read should be done with read_subslice_reg. + */ + wa_write_masked_or(wal, + GEN8_MCR_SELECTOR, + mcr_slice_subslice_mask, + intel_calculate_mcr_s_ss_select(i915)); +} + +static void +cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + wa_init_mcr(i915, wal); + + /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ + if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) + wa_write_or(wal, + GAMT_CHKN_BIT_REG, + GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT); + + /* WaInPlaceDecompressionHang:cnl */ + wa_write_or(wal, + GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); +} + +static void +icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + wa_init_mcr(i915, wal); + + /* WaInPlaceDecompressionHang:icl */ + wa_write_or(wal, + GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + /* WaModifyGamTlbPartitioning:icl */ + wa_write_masked_or(wal, + GEN11_GACB_PERF_CTRL, + GEN11_HASH_CTRL_MASK, + GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4); + + /* Wa_1405766107:icl + * Formerly known as WaCL2SFHalfMaxAlloc + */ + wa_write_or(wal, + GEN11_LSN_UNSLCVC, + GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC | + GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC); + + /* Wa_220166154:icl + * Formerly known as WaDisCtxReload + */ + wa_write_or(wal, + GEN8_GAMW_ECO_DEV_RW_IA, + GAMW_ECO_DEV_CTX_RELOAD_DISABLE); + + /* Wa_1405779004:icl (pre-prod) */ + if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) + wa_write_or(wal, + SLICE_UNIT_LEVEL_CLKGATE, + MSCUNIT_CLKGATE_DIS); + + /* Wa_1406680159:icl */ + wa_write_or(wal, + SUBSLICE_UNIT_LEVEL_CLKGATE, + GWUNIT_CLKGATE_DIS); + + /* Wa_1406838659:icl (pre-prod) */ + if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) + wa_write_or(wal, + INF_UNIT_LEVEL_CLKGATE, + CGPSF_CLKGATE_DIS); + + /* Wa_1406463099:icl + * Formerly known as WaGamTlbPendError + */ + wa_write_or(wal, + GAMT_CHKN_BIT_REG, + GAMT_CHKN_DISABLE_L3_COH_PIPE); +} + +static void +gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) +{ + if (IS_GEN(i915, 11)) + icl_gt_workarounds_init(i915, wal); + else if (IS_CANNONLAKE(i915)) + cnl_gt_workarounds_init(i915, wal); + else if (IS_COFFEELAKE(i915)) + cfl_gt_workarounds_init(i915, wal); + else if (IS_GEMINILAKE(i915)) + glk_gt_workarounds_init(i915, wal); + else if (IS_KABYLAKE(i915)) + kbl_gt_workarounds_init(i915, wal); + else if (IS_BROXTON(i915)) + bxt_gt_workarounds_init(i915, wal); + else if (IS_SKYLAKE(i915)) + skl_gt_workarounds_init(i915, wal); + else if (INTEL_GEN(i915) <= 8) + return; + else + MISSING_CASE(INTEL_GEN(i915)); +} + +void intel_gt_init_workarounds(struct drm_i915_private *i915) +{ + struct i915_wa_list *wal = &i915->gt_wa_list; + + wa_init_start(wal, "GT"); + gt_init_workarounds(i915, wal); + wa_init_finish(wal); +} + +static enum forcewake_domains +wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) +{ + enum forcewake_domains fw = 0; + struct i915_wa *wa; + unsigned int i; + + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) + fw |= intel_uncore_forcewake_for_reg(uncore, + wa->reg, + FW_REG_READ | + FW_REG_WRITE); + + return fw; +} + +static bool +wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) +{ + if ((cur ^ wa->val) & wa->read) { + DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", + name, from, i915_mmio_reg_offset(wa->reg), + cur, cur & wa->read, + wa->val, wa->mask); + + return false; + } + + return true; +} + +static void +wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) +{ + enum forcewake_domains fw; + unsigned long flags; + struct i915_wa *wa; + unsigned int i; + + if (!wal->count) + return; + + fw = wal_get_fw_for_rmw(uncore, wal); + + spin_lock_irqsave(&uncore->lock, flags); + intel_uncore_forcewake_get__locked(uncore, fw); + + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { + intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + wa_verify(wa, + intel_uncore_read_fw(uncore, wa->reg), + wal->name, "application"); + } + + intel_uncore_forcewake_put__locked(uncore, fw); + spin_unlock_irqrestore(&uncore->lock, flags); +} + +void intel_gt_apply_workarounds(struct drm_i915_private *i915) +{ + wa_list_apply(&i915->uncore, &i915->gt_wa_list); +} + +static bool wa_list_verify(struct intel_uncore *uncore, + const struct i915_wa_list *wal, + const char *from) +{ + struct i915_wa *wa; + unsigned int i; + bool ok = true; + + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) + ok &= wa_verify(wa, + intel_uncore_read(uncore, wa->reg), + wal->name, from); + + return ok; +} + +bool intel_gt_verify_workarounds(struct drm_i915_private *i915, + const char *from) +{ + return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from); +} + +static void +whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) +{ + struct i915_wa wa = { + .reg = reg + }; + + if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS)) + return; + + _wa_add(wal, &wa); +} + +static void gen9_whitelist_build(struct i915_wa_list *w) +{ + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ + whitelist_reg(w, GEN9_CTX_PREEMPT_REG); + + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ + whitelist_reg(w, GEN8_CS_CHICKEN1); + + /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ + whitelist_reg(w, GEN8_HDC_CHICKEN1); +} + +static void skl_whitelist_build(struct i915_wa_list *w) +{ + gen9_whitelist_build(w); + + /* WaDisableLSQCROPERFforOCL:skl */ + whitelist_reg(w, GEN8_L3SQCREG4); +} + +static void bxt_whitelist_build(struct i915_wa_list *w) +{ + gen9_whitelist_build(w); +} + +static void kbl_whitelist_build(struct i915_wa_list *w) +{ + gen9_whitelist_build(w); + + /* WaDisableLSQCROPERFforOCL:kbl */ + whitelist_reg(w, GEN8_L3SQCREG4); +} + +static void glk_whitelist_build(struct i915_wa_list *w) +{ + gen9_whitelist_build(w); + + /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ + whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); +} + +static void cfl_whitelist_build(struct i915_wa_list *w) +{ + gen9_whitelist_build(w); +} + +static void cnl_whitelist_build(struct i915_wa_list *w) +{ + /* WaEnablePreemptionGranularityControlByUMD:cnl */ + whitelist_reg(w, GEN8_CS_CHICKEN1); +} + +static void icl_whitelist_build(struct i915_wa_list *w) +{ + /* WaAllowUMDToModifyHalfSliceChicken7:icl */ + whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7); + + /* WaAllowUMDToModifySamplerMode:icl */ + whitelist_reg(w, GEN10_SAMPLER_MODE); +} + +void intel_engine_init_whitelist(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + struct i915_wa_list *w = &engine->whitelist; + + GEM_BUG_ON(engine->id != RCS0); + + wa_init_start(w, "whitelist"); + + if (IS_GEN(i915, 11)) + icl_whitelist_build(w); + else if (IS_CANNONLAKE(i915)) + cnl_whitelist_build(w); + else if (IS_COFFEELAKE(i915)) + cfl_whitelist_build(w); + else if (IS_GEMINILAKE(i915)) + glk_whitelist_build(w); + else if (IS_KABYLAKE(i915)) + kbl_whitelist_build(w); + else if (IS_BROXTON(i915)) + bxt_whitelist_build(w); + else if (IS_SKYLAKE(i915)) + skl_whitelist_build(w); + else if (INTEL_GEN(i915) <= 8) + return; + else + MISSING_CASE(INTEL_GEN(i915)); + + wa_init_finish(w); +} + +void intel_engine_apply_whitelist(struct intel_engine_cs *engine) +{ + const struct i915_wa_list *wal = &engine->whitelist; + struct intel_uncore *uncore = engine->uncore; + const u32 base = engine->mmio_base; + struct i915_wa *wa; + unsigned int i; + + if (!wal->count) + return; + + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) + intel_uncore_write(uncore, + RING_FORCE_TO_NONPRIV(base, i), + i915_mmio_reg_offset(wa->reg)); + + /* And clear the rest just in case of garbage */ + for (; i < RING_MAX_NONPRIV_SLOTS; i++) + intel_uncore_write(uncore, + RING_FORCE_TO_NONPRIV(base, i), + i915_mmio_reg_offset(RING_NOPID(base))); +} + +static void |