diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-14 11:07:56 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-14 11:07:56 -0800 |
| commit | 1d36dffa5d887715dacca0f717f4519b7be5e498 (patch) | |
| tree | a68f7c00dbb3036a67806ed6c6b8cc61c3cff60d /drivers/gpu/drm/i915/gt/selftest_timeline.c | |
| parent | 2c85ebc57b3e1817b6ce1a6b703928e113a90442 (diff) | |
| parent | b10733527bfd864605c33ab2e9a886eec317ec39 (diff) | |
| download | linux-1d36dffa5d887715dacca0f717f4519b7be5e498.tar.gz linux-1d36dffa5d887715dacca0f717f4519b7be5e498.tar.bz2 linux-1d36dffa5d887715dacca0f717f4519b7be5e498.zip | |
Merge tag 'drm-next-2020-12-11' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie:
"Not a huge amount of big things here, AMD has support for a few new HW
variants (vangogh, green sardine, dimgrey cavefish), Intel has some
more DG1 enablement. We have a few big reworks of the TTM layers and
interfaces, GEM and atomic internal API reworks cross tree. fbdev is
marked orphaned in here as well to reflect the current reality.
core:
- documentation updates
- deprecate DRM_FORMAT_MOD_NONE
- atomic crtc enable/disable rework
- GEM convert drivers to gem object functions
- remove SCATTER_LIST_MAX_SEGMENT
sched:
- avoid infinite waits
ttm:
- remove AGP support
- don't modify caching for swapout
- ttm pinning rework
- major TTM reworks
- new backend allocator
- multihop support
vram-helper:
- top down BO placement fix
- TTM changes
- GEM object support
displayport:
- DP 2.0 DPCD prep work
- DP MST extended DPCD caps
fbdev:
- mark as orphaned
amdgpu:
- Initial Vangogh support
- Green Sardine support
- Dimgrey Cavefish support
- SG display support for renoir
- SMU7 improvements
- gfx9+ modiifier support
- CI BACO fixes
radeon:
- expose voltage via hwmon on SUMO
amdkfd:
- fix unique id handling
i915:
- more DG1 enablement
- bigjoiner support
- integer scaling filter support
- async flip support
- ICL+ DSI command mode
- Improve display shutdown
- Display refactoring
- eLLC machine fbdev loading fix
- dma scatterlist fixes
- TGL hang fixes
- eLLC display buffer caching on SKL+
- MOCS PTE seeting for gen9+
msm:
- Shutdown hook
- GPU cooling device support
- DSI 7nm and 10nm phy/pll updates
- sm8150/sm2850 DPU support
- GEM locking re-work
- LLCC system cache support
aspeed:
- sysfs output config support
ast:
- LUT fix
- new display mode
gma500:
- remove 2d framebuffer accel
panfrost:
- move gpu reset to a worker
exynos:
- new HDMI mode support
mediatek:
- MT8167 support
- yaml bindings
- MIPI DSI phy code moved
etnaviv:
- new perf counter
- more lockdep annotation
hibmc:
- i2c DDC support
ingenic:
- pixel clock reset fix
- reserved memory support
- allow both DMA channels at once
- different pixel format support
- 30/24/8-bit palette modes
tilcdc:
- don't keep vblank irq enabled
vc4:
- new maintainer added
- DSI registration fix
virtio:
- blob resource support
- host visible and cross-device support
- uuid api support"
* tag 'drm-next-2020-12-11' of git://anongit.freedesktop.org/drm/drm: (1754 commits)
drm/amdgpu: Initialise drm_gem_object_funcs for imported BOs
drm/amdgpu: fix size calculation with stolen vga memory
drm/amdgpu: remove amdgpu_ttm_late_init and amdgpu_bo_late_init
drm/amdgpu: free the pre-OS console framebuffer after the first modeset
drm/amdgpu: enable runtime pm using BACO on CI dGPUs
drm/amdgpu/cik: enable BACO reset on Bonaire
drm/amd/pm: update smu10.h WORKLOAD_PPLIB setting for raven
drm/amd/pm: remove one unsupported smu function for vangogh
drm/amd/display: setup system context for APUs
drm/amd/display: add S/G support for Vangogh
drm/amdkfd: Fix leak in dmabuf import
drm/amdgpu: use AMDGPU_NUM_VMID when possible
drm/amdgpu: fix sdma instance fw version and feature version init
drm/amd/pm: update driver if version for dimgrey_cavefish
drm/amd/display: 3.2.115
drm/amd/display: [FW Promotion] Release 0.0.45
drm/amd/display: Revert DCN2.1 dram_clock_change_latency update
drm/amd/display: Enable gpu_vm_support for dcn3.01
drm/amd/display: Fixed the audio noise during mode switching with HDCP mode on
drm/amd/display: Add wm table for Renoir
...
Diffstat (limited to 'drivers/gpu/drm/i915/gt/selftest_timeline.c')
| -rw-r--r-- | drivers/gpu/drm/i915/gt/selftest_timeline.c | 378 |
1 files changed, 376 insertions, 2 deletions
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 19c2cb166e7c..2edf2b15885f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -17,8 +17,9 @@ #include "../selftests/i915_random.h" #include "../i915_selftest.h" -#include "../selftests/igt_flush_test.h" -#include "../selftests/mock_gem_device.h" +#include "selftests/igt_flush_test.h" +#include "selftests/lib_sw_fence.h" +#include "selftests/mock_gem_device.h" #include "selftests/mock_timeline.h" static struct page *hwsp_page(struct intel_timeline *tl) @@ -755,6 +756,378 @@ out_free: return err; } +static int emit_read_hwsp(struct i915_request *rq, + u32 seqno, u32 hwsp, + u32 *addr) +{ + const u32 gpr = i915_mmio_reg_offset(GEN8_RING_CS_GPR(rq->engine->mmio_base, 0)); + u32 *cs; + + cs = intel_ring_begin(rq, 12); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = *addr; + *cs++ = 0; + *cs++ = seqno; + *addr += 4; + + *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = gpr; + *cs++ = hwsp; + *cs++ = 0; + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = gpr; + *cs++ = *addr; + *cs++ = 0; + *addr += 4; + + intel_ring_advance(rq, cs); + + return 0; +} + +struct hwsp_watcher { + struct i915_vma *vma; + struct i915_request *rq; + u32 addr; + u32 *map; +}; + +static bool cmp_lt(u32 a, u32 b) +{ + return a < b; +} + +static bool cmp_gte(u32 a, u32 b) +{ + return a >= b; +} + +static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(gt->i915, SZ_2M); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + w->map = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(w->map)) { + i915_gem_object_put(obj); + return PTR_ERR(w->map); + } + + vma = i915_gem_object_ggtt_pin_ww(obj, NULL, NULL, 0, 0, 0); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return PTR_ERR(vma); + } + + w->vma = vma; + w->addr = i915_ggtt_offset(vma); + return 0; +} + +static int create_watcher(struct hwsp_watcher *w, + struct intel_engine_cs *engine, + int ringsz) +{ + struct intel_context *ce; + struct intel_timeline *tl; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + ce->ring = __intel_context_ring_size(ringsz); + w->rq = intel_context_create_request(ce); + intel_context_put(ce); + if (IS_ERR(w->rq)) + return PTR_ERR(w->rq); + + w->addr = i915_ggtt_offset(w->vma); + tl = w->rq->context->timeline; + + /* some light mutex juggling required; think co-routines */ + lockdep_unpin_lock(&tl->mutex, w->rq->cookie); + mutex_unlock(&tl->mutex); + + return 0; +} + +static int check_watcher(struct hwsp_watcher *w, const char *name, + bool (*op)(u32 hwsp, u32 seqno)) +{ + struct i915_request *rq = fetch_and_zero(&w->rq); + struct intel_timeline *tl = rq->context->timeline; + u32 offset, end; + int err; + + GEM_BUG_ON(w->addr - i915_ggtt_offset(w->vma) > w->vma->size); + + i915_request_get(rq); + mutex_lock(&tl->mutex); + rq->cookie = lockdep_pin_lock(&tl->mutex); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ) < 0) { + err = -ETIME; + goto out; + } + + err = 0; + offset = 0; + end = (w->addr - i915_ggtt_offset(w->vma)) / sizeof(*w->map); + while (offset < end) { + if (!op(w->map[offset + 1], w->map[offset])) { + pr_err("Watcher '%s' found HWSP value %x for seqno %x\n", + name, w->map[offset + 1], w->map[offset]); + err = -EINVAL; + } + + offset += 2; + } + +out: + i915_request_put(rq); + return err; +} + +static void cleanup_watcher(struct hwsp_watcher *w) +{ + if (w->rq) { + struct intel_timeline *tl = w->rq->context->timeline; + + mutex_lock(&tl->mutex); + w->rq->cookie = lockdep_pin_lock(&tl->mutex); + + i915_request_add(w->rq); + } + + i915_vma_unpin_and_release(&w->vma, I915_VMA_RELEASE_MAP); +} + +static bool retire_requests(struct intel_timeline *tl) +{ + struct i915_request *rq, *rn; + + mutex_lock(&tl->mutex); + list_for_each_entry_safe(rq, rn, &tl->requests, link) + if (!i915_request_retire(rq)) + break; + mutex_unlock(&tl->mutex); + + return !i915_active_fence_isset(&tl->last_request); +} + +static struct i915_request *wrap_timeline(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + struct intel_timeline *tl = ce->timeline; + u32 seqno = rq->fence.seqno; + + while (tl->seqno >= seqno) { /* Cause a wrap */ + i915_request_put(rq); + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + i915_request_get(rq); + i915_request_add(rq); + } + + i915_request_put(rq); + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + i915_request_get(rq); + i915_request_add(rq); + + return rq; +} + +static int live_hwsp_read(void *arg) +{ + struct intel_gt *gt = arg; + struct hwsp_watcher watcher[2] = {}; + struct intel_engine_cs *engine; + struct intel_timeline *tl; + enum intel_engine_id id; + int err = 0; + int i; + + /* + * If we take a reference to the HWSP for reading on the GPU, that + * read may be arbitrarily delayed (either by foreign fence or + * priority saturation) and a wrap can happen within 30 minutes. + * When the GPU read is finally submitted it should be correct, + * even across multiple wraps. + */ + + if (INTEL_GEN(gt->i915) < 8) /* CS convenience [SRM/LRM] */ + return 0; + + tl = intel_timeline_create(gt); + if (IS_ERR(tl)) + return PTR_ERR(tl); + + if (!tl->hwsp_cacheline) + goto out_free; + + for (i = 0; i < ARRAY_SIZE(watcher); i++) { + err = setup_watcher(&watcher[i], gt); + if (err) + goto out; + } + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + unsigned long count = 0; + IGT_TIMEOUT(end_time); + + /* Create a request we can use for remote reading of the HWSP */ + err = create_watcher(&watcher[1], engine, SZ_512K); + if (err) + goto out; + + do { + struct i915_sw_fence *submit; + struct i915_request *rq; + u32 hwsp; + + submit = heap_fence_create(GFP_KERNEL); + if (!submit) { + err = -ENOMEM; + goto out; + } + + err = create_watcher(&watcher[0], engine, SZ_4K); + if (err) + goto out; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + /* Skip to the end, saving 30 minutes of nops */ + tl->seqno = -10u + 2 * (count & 3); + WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); + ce->timeline = intel_timeline_get(tl); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + intel_context_put(ce); + goto out; + } + + err = i915_sw_fence_await_dma_fence(&rq->submit, + &watcher[0].rq->fence, 0, + GFP_KERNEL); + if (err < 0) { + i915_request_add(rq); + intel_context_put(ce); + goto out; + } + + mutex_lock(&watcher[0].rq->context->timeline->mutex); + err = intel_timeline_read_hwsp(rq, watcher[0].rq, &hwsp); + if (err == 0) + err = emit_read_hwsp(watcher[0].rq, /* before */ + rq->fence.seqno, hwsp, + &watcher[0].addr); + mutex_unlock(&watcher[0].rq->context->timeline->mutex); + if (err) { + i915_request_add(rq); + intel_context_put(ce); + goto out; + } + + mutex_lock(&watcher[1].rq->context->timeline->mutex); + err = intel_timeline_read_hwsp(rq, watcher[1].rq, &hwsp); + if (err == 0) + err = emit_read_hwsp(watcher[1].rq, /* after */ + rq->fence.seqno, hwsp, + &watcher[1].addr); + mutex_unlock(&watcher[1].rq->context->timeline->mutex); + if (err) { + i915_request_add(rq); + intel_context_put(ce); + goto out; + } + + i915_request_get(rq); + i915_request_add(rq); + + rq = wrap_timeline(rq); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + err = i915_sw_fence_await_dma_fence(&watcher[1].rq->submit, + &rq->fence, 0, + GFP_KERNEL); + if (err < 0) { + i915_request_put(rq); + goto out; + } + + err = check_watcher(&watcher[0], "before", cmp_lt); + i915_sw_fence_commit(submit); + heap_fence_put(submit); + if (err) { + i915_request_put(rq); + goto out; + } + count++; + + if (8 * watcher[1].rq->ring->emit > + 3 * watcher[1].rq->ring->size) { + i915_request_put(rq); + break; + } + + /* Flush the timeline before manually wrapping again */ + if (i915_request_wait(rq, + I915_WAIT_INTERRUPTIBLE, + HZ) < 0) { + err = -ETIME; + i915_request_put(rq); + goto out; + } + + retire_requests(tl); + i915_request_put(rq); + } while (!__igt_timeout(end_time, NULL)); + WRITE_ONCE(*(u32 *)tl->hwsp_seqno, 0xdeadbeef); + + pr_info("%s: simulated %lu wraps\n", engine->name, count); + err = check_watcher(&watcher[1], "after", cmp_gte); + if (err) + goto out; + } + +out: + for (i = 0; i < ARRAY_SIZE(watcher); i++) + cleanup_watcher(&watcher[i]); + + if (igt_flush_test(gt->i915)) + err = -EIO; + +out_free: + intel_timeline_put(tl); + return err; +} + static int live_hwsp_rollover_kernel(void *arg) { struct intel_gt *gt = arg; @@ -998,6 +1371,7 @@ int intel_timeline_live_selftests(struct drm_i915_private *i915) SUBTEST(live_hwsp_engine), SUBTEST(live_hwsp_alternate), SUBTEST(live_hwsp_wrap), + SUBTEST(live_hwsp_read), SUBTEST(live_hwsp_rollover_kernel), SUBTEST(live_hwsp_rollover_user), }; |
