/*
* Copyright 2008 Jerome Glisse.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Jerome Glisse <glisse@freedesktop.org>
*/
#include <linux/pagemap.h>
#include <drm/drmP.h>
#include <drm/amdgpu_drm.h>
#include <drm/drm_syncobj.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
struct drm_amdgpu_cs_chunk_fence *data,
uint32_t *offset)
{
struct drm_gem_object *gobj;
unsigned long size;
gobj = drm_gem_object_lookup(p->filp, data->handle);
if (gobj == NULL)
return -EINVAL;
p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
p->uf_entry.priority = 0;
p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
p->uf_entry.tv.shared = true;
p->uf_entry.user_pages = NULL;
size = amdgpu_bo_size(p->uf_entry.robj);
if (size != PAGE_SIZE || (data->offset + 8) > size)
return -EINVAL;
*offset = data->offset;
drm_gem_object_put_unlocked(gobj);
if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
amdgpu_bo_unref(&p->uf_entry.robj);
return -EINVAL;
}
return 0;
}
static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
union drm_amdgpu_cs *cs = data;
uint64_t *chunk_array_user;
uint64_t *chunk_array;
unsigned size, num_ibs = 0;
uint32_t uf_offset = 0;
int i;
int ret;
if (cs->in.num_chunks == 0)
return 0;
chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
if (!chunk_array)
return -ENOMEM;
p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
if (!p->ctx) {
ret = -EINVAL;
goto free_chunk;
}
/* get chunks */
chunk_array_user = u64_to_user_ptr(cs->in.chunks);
if (copy_from_user(chunk_array, chunk_array_user,
sizeof(uint64_t)*cs->in.num_chunks)) {
ret = -EFAULT;
goto put_ctx;
}
p->nchunks = cs->in.num_chunks;
p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
GFP_KERNEL);
if (!p->chunks) {
ret = -ENOMEM;
goto put_ctx;
}
for (i = 0; i < p->nchunks; i++) {
struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
struct drm_amdgpu_cs_chunk user_chunk;
uint32_t __user *cdata;
chunk_ptr = u64_to_user_ptr(chunk_array[i]);
if (copy_from_user(&user_chunk, chunk_ptr,
sizeof(struct drm_amdgpu_cs_chunk))) {
ret = -EFAULT;
i--;
goto free_partial_kdata;
}
p->chunks[i].chunk_id = user_chunk.chunk_id;
p->chunks[i].length_dw = user_chunk.length_dw;
size = p->chunks[i].length_dw;
cdata = u64_to_user_ptr(user_chunk.chunk_data);
p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
if (p->chunks[i].kdata == NULL) {
ret = -ENOMEM;
i--;
goto free_partial_kdata;
}
size *= sizeof(uint32_t);
if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
ret = -EFAULT;
goto free_partial_kdata;
}
switch (p->chunks[i].chunk_id) {
case AMDGPU_CHUNK_ID_IB:
++num_ibs;
break;
case AMDGPU_CHUNK_ID_FENCE:
size = sizeof(struct drm_amdgpu_cs_chunk_fence);
if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
ret = -EINVAL;
goto free_partial_kdata;
}
ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata,
&uf_offset);
if (ret)
goto free_partial_kdata;
break;
case AMDGPU_CHUNK_ID_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
break;
default:
ret = -EINVAL;
goto free_partial_kdata;
}
}
ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
if (ret)
goto free_all_kdata;
if (p->uf_entry.robj)
p->job->uf_addr = uf_offset;
kfree(chunk_array);
return 0;
free_all_kdata:
i = p->nchunks - 1;
free_partial_kdata:
for (; i >= 0; i--)
kvfree(p->chunks[i].kdata);
kfree(p->chunks);
p->chunks = NULL;
p->nchunks = 0;
put_ctx:
amdgpu_ctx_put(p->ctx);
free_chunk:
kfree(chunk_array);
return ret;
}
/* Convert microseconds to bytes. */
static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
{
if (us <= 0 || !adev->mm_stats.log2_max_MBps)
return 0;
/* Since accum_us is incremented by a million per second, just
* multiply it by the number of MB/s to get the number of bytes.
*/
return us << adev->mm_stats.log2_max_MBps;
}
static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
{
if (!adev->mm_stats.log2_max_MBps)
return 0;
return bytes >> adev->mm_stats.log2_max_MBps;
}
/* Returns how many bytes TTM can move right now. If no bytes can be moved,
* it returns 0. If it returns non-zero, it's OK to move at least one buffer,
* which means it can go over the threshold once. If that happens, the driver
* will be in debt and no other buffer migrations can be done until that debt
* is repaid.
*
* This approach allows moving a buffer of any size (it's important to allow
* that).
*
* The currency is simply time in microseconds and it increases as the clock
* ticks. The accumulated microseconds (us) are converted to bytes and
* returned.
*/
static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
u64 *max_bytes,
u64 *max_vis_bytes)
{
s64 time_us, increment_us;
u64 free_vram, total_vram, used_vram;
/* Allow a maximum of 200 accumulated ms. This is basically per-IB
* throttling.
*
* It means that in order to get full max MBps, at least 5 IBs per
* second must be submitted and not more than 200ms apart from each
* other.
*/
const s64 us_upper_bound = 200000;
if (!adev->mm_stats.log2_max_MBps) {
*max_bytes = 0;
*max_vis_bytes = 0;
return;
}
total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
spin_lock(&adev->mm_stats.lock);
/* Increase the amount of accumulated us. */
time_us = ktime_to_us(ktime_get());
increment_us = time_us - adev->mm_stats.last_update_us;
adev->mm_stats.la
|