diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4171 |
1 files changed, 4057 insertions, 114 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 5f8500577c02..f5b8d3f388ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -25,35 +25,504 @@ #include "amdgpu.h" #include "amdgpu_gfx.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "vega10_enum.h" +#include "v9_structs.h" + +#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" + #include "gc/gc_9_4_3_offset.h" #include "gc/gc_9_4_3_sh_mask.h" #include "gfx_v9_4_3.h" +#include "amdgpu_xcp.h" + +MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); +#define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L +#define GOLDEN_GB_ADDR_CONFIG 0x2a114042 + +struct amdgpu_gfx_ras gfx_v9_4_3_ras; + +static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); +static void gfx_v9_4_3_set_irq_funcs(struct amdgpu_device *adev); +static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev); +static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev); +static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, + struct amdgpu_cu_info *cu_info); + +static void gfx_v9_4_3_kiq_set_resources(struct amdgpu_ring *kiq_ring, + uint64_t queue_mask) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, + PACKET3_SET_RESOURCES_VMID_MASK(0) | + /* vmid_mask:0* queue_type:0 (KIQ) */ + PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); + amdgpu_ring_write(kiq_ring, + lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, + upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ +} + +static void gfx_v9_4_3_kiq_map_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = kiq_ring->adev; + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | + /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | + /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | + PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | + /* num_queues: must be 1 */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); +} + +static void gfx_v9_4_3_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + enum amdgpu_unmap_queues_action action, + u64 gpu_addr, u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(action) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); + + if (action == PREEMPT_QUEUES_NO_UNMAP) { + amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, seq); + } else { + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + } +} + +static void gfx_v9_4_3_kiq_query_status(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + u64 addr, + u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_CONTEXT_ID(0) | + PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | + PACKET3_QUERY_STATUS_COMMAND(2)); + /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | + PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); + amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); + amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); +} + +static void gfx_v9_4_3_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(kiq_ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); +} + +static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = { + .kiq_set_resources = gfx_v9_4_3_kiq_set_resources, + .kiq_map_queues = gfx_v9_4_3_kiq_map_queues, + .kiq_unmap_queues = gfx_v9_4_3_kiq_unmap_queues, + .kiq_query_status = gfx_v9_4_3_kiq_query_status, + .kiq_invalidate_tlbs = gfx_v9_4_3_kiq_invalidate_tlbs, + .set_resources_size = 8, + .map_queues_size = 7, + .unmap_queues_size = 6, + .query_status_size = 7, + .invalidate_tlbs_size = 2, +}; + +static void gfx_v9_4_3_set_kiq_pm4_funcs(struct amdgpu_device *adev) +{ + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) + adev->gfx.kiq[i].pmf = &gfx_v9_4_3_kiq_pm4_funcs; +} + +static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) +{ + int i, num_xcc, dev_inst; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { + dev_inst = GET_INST(GC, i); + if (dev_inst >= 2) + WREG32_SOC15(GC, dev_inst, regGRBM_MCM_ADDR, 0x4); + + /* Golden settings applied by driver for ASIC with rev_id 0 */ + if (adev->rev_id == 0) { + WREG32_SOC15(GC, dev_inst, regGB_ADDR_CONFIG, + GOLDEN_GB_ADDR_CONFIG); + + WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL1, + REDUCE_FIFO_DEPTH_BY_2, 2); + } + } +} + +static void gfx_v9_4_3_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, + bool wc, uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | + WRITE_DATA_DST_SEL(0) | + (wc ? WR_CONFIRM : 0)); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); +} + +static void gfx_v9_4_3_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static int gfx_v9_4_3_ring_test_ring(struct amdgpu_ring *ring) +{ + uint32_t scratch_reg0_offset, xcc_offset; + struct amdgpu_device *adev = ring->adev; + uint32_t tmp = 0; + unsigned i; + int r; + + /* Use register offset which is local to XCC in the packet */ + xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); + scratch_reg0_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, ring->xcc_id), regSCRATCH_REG0); + WREG32(scratch_reg0_offset, 0xCAFEDEAD); + + r = amdgpu_ring_alloc(ring, 3); + if (r) + return r; + + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); + amdgpu_ring_write(ring, xcc_offset - PACKET3_SET_UCONFIG_REG_START); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(scratch_reg0_offset); + if (tmp == 0xDEADBEEF) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + return r; +} + +static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + + unsigned index; + uint64_t gpu_addr; + uint32_t tmp; + long r; + + r = amdgpu_device_wb_get(adev, &index); + if (r) + return r; + + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + memset(&ib, 0, sizeof(ib)); + r = amdgpu_ib_get(adev, NULL, 16, + AMDGPU_IB_POOL_DIRECT, &ib); + if (r) + goto err1; + + ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); + ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; + ib.ptr[2] = lower_32_bits(gpu_addr); + ib.ptr[3] = upper_32_bits(gpu_addr); + ib.ptr[4] = 0xDEADBEEF; + ib.length_dw = 5; + + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) + goto err2; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) { + r = -ETIMEDOUT; + goto err2; + } else if (r < 0) { + goto err2; + } + + tmp = adev->wb.wb[index]; + if (tmp == 0xDEADBEEF) + r = 0; + else + r = -EINVAL; + +err2: + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); +err1: + amdgpu_device_wb_free(adev, index); + return r; +} + + +/* This value might differs per partition */ static uint64_t gfx_v9_4_3_get_gpu_clock_counter(struct amdgpu_device *adev) { uint64_t clock; amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->gfx.gpu_clock_mutex); - WREG32_SOC15(GC, 0, regRLC_CAPTURE_GPU_CLOCK_COUNT, 1); - clock = (uint64_t)RREG32_SOC15(GC, 0, regRLC_GPU_CLOCK_COUNT_LSB) | - ((uint64_t)RREG32_SOC15(GC, 0, regRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); mutex_unlock(&adev->gfx.gpu_clock_mutex); amdgpu_gfx_off_ctrl(adev, true); return clock; } -static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev, - u32 se_num, - u32 sh_num, - u32 instance) +static void gfx_v9_4_3_free_microcode(struct amdgpu_device *adev) +{ + amdgpu_ucode_release(&adev->gfx.pfp_fw); + amdgpu_ucode_release(&adev->gfx.me_fw); + amdgpu_ucode_release(&adev->gfx.ce_fw); + amdgpu_ucode_release(&adev->gfx.rlc_fw); + amdgpu_ucode_release(&adev->gfx.mec_fw); + amdgpu_ucode_release(&adev->gfx.mec2_fw); + + kfree(adev->gfx.rlc.register_list_format); +} + +static int gfx_v9_4_3_init_rlc_microcode(struct amdgpu_device *adev, + const char *chip_name) +{ + char fw_name[30]; + int err; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + uint16_t version_major; + uint16_t version_minor; + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); + + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); + if (err) + goto out; + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); + err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); +out: + if (err) + amdgpu_ucode_release(&adev->gfx.rlc_fw); + + return err; +} + +static bool gfx_v9_4_3_should_disable_gfxoff(struct pci_dev *pdev) +{ + return true; +} + +static void gfx_v9_4_3_check_if_need_gfxoff(struct amdgpu_device *adev) +{ + if (gfx_v9_4_3_should_disable_gfxoff(adev->pdev)) + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; +} + +static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev, + const char *chip_name) +{ + char fw_name[30]; + int err; + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name); + if (err) + goto out; + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); + + adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version; + adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version; + + gfx_v9_4_3_check_if_need_gfxoff(adev); + +out: + if (err) + amdgpu_ucode_release(&adev->gfx.mec_fw); + return err; +} + +static int gfx_v9_4_3_init_microcode(struct amdgpu_device *adev) +{ + const char *chip_name; + int r; + + chip_name = "gc_9_4_3"; + + r = gfx_v9_4_3_init_rlc_microcode(adev, chip_name); + if (r) + return r; + + r = gfx_v9_4_3_init_cp_compute_microcode(adev, chip_name); + if (r) + return r; + + return r; +} + +static void gfx_v9_4_3_mec_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); +} + +static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev) +{ + int r, i, num_xcc; + u32 *hpd; + const __le32 *fw_data; + unsigned fw_size; + u32 *fw; + size_t mec_hpd_size; + + const struct gfx_firmware_header_v1_0 *mec_hdr; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) + bitmap_zero(adev->gfx.mec_bitmap[i].queue_bitmap, + AMDGPU_MAX_COMPUTE_QUEUES); + + /* take ownership of the relevant compute queues */ + amdgpu_gfx_compute_queue_acquire(adev); + mec_hpd_size = + adev->gfx.num_compute_rings * num_xcc * GFX9_MEC_HPD_SIZE; + if (mec_hpd_size) { + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.hpd_eop_obj, + &adev->gfx.mec.hpd_eop_gpu_addr, + (void **)&hpd); + if (r) { + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); + gfx_v9_4_3_mec_fini(adev); + return r; + } + + if (amdgpu_emu_mode == 1) { + for (i = 0; i < mec_hpd_size / 4; i++) { + memset((void *)(hpd + i), 0, 4); + if (i % 50 == 0) + msleep(1); + } + } else { + memset(hpd, 0, mec_hpd_size); + } + + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + } + + mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + + fw_data = (const __le32 *) + (adev->gfx.mec_fw->data + + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); + + r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.mec_fw_obj, + &adev->gfx.mec.mec_fw_gpu_addr, + (void **)&fw); + if (r) { + dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); + gfx_v9_4_3_mec_fini(adev); + return r; + } + + memcpy(fw, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); + amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); + + return 0; +} + +static void gfx_v9_4_3_xcc_select_se_sh(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 instance, int xcc_id) { u32 data; @@ -76,24 +545,24 @@ static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev, else data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); - WREG32_SOC15_RLC_SHADOW_EX(reg, GC, 0, regGRBM_GFX_INDEX, data); + WREG32_SOC15_RLC_SHADOW_EX(reg, GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX, data); } -static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) +static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t address) { - WREG32_SOC15_RLC(GC, 0, regSQ_IND_INDEX, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX, (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | (address << SQ_IND_INDEX__INDEX__SHIFT) | (SQ_IND_INDEX__FORCE_READ_MASK)); - return RREG32_SOC15(GC, 0, regSQ_IND_DATA); + return RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA); } -static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, +static void wave_read_regs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t regno, uint32_t num, uint32_t *out) { - WREG32_SOC15_RLC(GC, 0, regSQ_IND_INDEX, + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX, (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | (regno << SQ_IND_INDEX__INDEX__SHIFT) | @@ -101,53 +570,487 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, (SQ_IND_INDEX__FORCE_READ_MASK) | (SQ_IND_INDEX__AUTO_INCR_MASK)); while (num--) - *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); + *(out++) = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA); } static void gfx_v9_4_3_read_wave_data(struct amdgpu_device *adev, - uint32_t simd, uint32_t wave, + uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* type 1 wave data */ dst[(*no_fields)++] = 1; - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); - dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); -} - -static void gfx_v9_4_3_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_STATUS); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_HW_ID); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW0); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW1); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_GPR_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_LDS_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_TRAPSTS); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_STS); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_DBG0); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_MODE); +} + +static void gfx_v9_4_3_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { - wave_read_regs(adev, simd, wave, 0, + wave_read_regs(adev, xcc_id, simd, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, dst); } -static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v9_4_3_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst) { - wave_read_regs(adev, simd, wave, thread, + wave_read_regs(adev, xcc_id, simd, wave, thread, start + SQIND_WAVE_VGPRS_OFFSET, size, dst); } static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { - soc15_grbm_select(adev, me, pipe, q, vm); + soc15_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id)); +} + + +static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, + int num_xccs_per_xcp) +{ + int i, num_xcc; + u32 tmp = 0; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + for (i = 0; i < num_xcc; i++) { + tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP, + num_xccs_per_xcp); + tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID, + i % num_xccs_per_xcp); + WREG32_SOC15(GC, GET_INST(GC, i), regCP_HYP_XCP_CTL, tmp); + } + + adev->gfx.num_xcc_per_xcp = num_xccs_per_xcp; + + return 0; +} + +static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node) +{ + int xcc; + + xcc = hweight8(adev->gfx.xcc_mask & GENMASK(ih_node / 2, 0)); + if (!xcc) { + dev_err(adev->dev, "Couldn't find xcc mapping from IH node"); + return -EINVAL; + } + + return xcc - 1; +} + +static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { + .get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter, + .select_se_sh = &gfx_v9_4_3_xcc_select_se_sh, + .read_wave_data = &gfx_v9_4_3_read_wave_data, + .read_wave_sgprs = &gfx_v9_4_3_read_wave_sgprs, + .read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs, + .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q, + .switch_partition_mode = &gfx_v9_4_3_switch_compute_partition, + .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst, +}; + +static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) +{ + u32 gb_addr_config; + + adev->gfx.funcs = &gfx_v9_4_3_gfx_funcs; + adev->gfx.ras = &gfx_v9_4_3_ras; + + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(9, 4, 3): + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = RREG32_SOC15(GC, GET_INST(GC, 0), regGB_ADDR_CONFIG); + break; + default: + BUG(); + break; + } + + adev->gfx.config.gb_addr_config = gb_addr_config; + + adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_PIPES); + + adev->gfx.config.max_tile_pipes = + adev->gfx.config.gb_addr_config_fields.num_pipes; + + adev->gfx.config.gb_addr_config_fields.num_banks = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_BANKS); + adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + MAX_COMPRESSED_FRAGS); + adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_RB_PER_SE); + adev->gfx.config.gb_addr_config_fields.num_se = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_SHADER_ENGINES); + adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + PIPE_INTERLEAVE_SIZE)); + + return 0; +} + +static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, + int xcc_id, int mec, int pipe, int queue) +{ + unsigned irq_type; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + unsigned int hw_prio; + uint32_t xcc_doorbell_start; + + ring = &adev->gfx.compute_ring[xcc_id * adev->gfx.num_compute_rings + + ring_id]; + + /* mec0 is me1 */ + ring->xcc_id = xcc_id; + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + xcc_doorbell_start = adev->doorbell_index.mec_ring0 + + xcc_id * adev->doorbell_index.xcc_doorbell_range; + ring->doorbell_index = (xcc_doorbell_start + ring_id) << 1; + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + + (ring_id + xcc_id * adev->gfx.num_compute_rings) * + GFX9_MEC_HPD_SIZE; + ring->vm_hub = AMDGPU_GFXHUB(xcc_id); + sprintf(ring->name, "comp_%d.%d.%d.%d", + ring->xcc_id, ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? + AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; + /* type-2 packets are deprecated on MEC, use type-3 instead */ + return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, + hw_prio, NULL); +} + +static int gfx_v9_4_3_sw_init(void *handle) +{ + int i, j, k, r, ring_id, xcc_id, num_xcc; + struct amdgpu_kiq *kiq; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->gfx.mec.num_mec = 2; + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + /* EOP Event */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); + if (r) + return r; + + /* Privileged reg */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, + &adev->gfx.priv_reg_irq); + if (r) + return r; + + /* Privileged inst */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, + &adev->gfx.priv_inst_irq); + if (r) + return r; + + adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; + + r = adev->gfx.rlc.funcs->init(adev); + if (r) { + DRM_ERROR("Failed to init rlc BOs!\n"); + return r; + } + + r = gfx_v9_4_3_mec_init(adev); + if (r) { + DRM_ERROR("Failed to init MEC BOs!\n"); + return r; + } + + /* set up the compute queues - allocate horizontally across pipes */ + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + ring_id = 0; + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; + k++) { + if (!amdgpu_gfx_is_mec_queue_enabled( + adev, xcc_id, i, k, j)) + continue; + + r = gfx_v9_4_3_compute_ring_init(adev, + ring_id, + xcc_id, + i, k, j); + if (r) + return r; + + ring_id++; + } + } + } + + r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, xcc_id); + if (r) { + DRM_ERROR("Failed to init KIQ BOs!\n"); + return r; + } + + kiq = &adev->gfx.kiq[xcc_id]; + r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, xcc_id); + if (r) + return r; + + /* create MQD for all compute queues as wel as KIQ for SRIOV case */ + r = amdgpu_gfx_mqd_sw_init(adev, + sizeof(struct v9_mqd_allocation), xcc_id); + if (r) + return r; + } + + r = gfx_v9_4_3_gpu_early_init(adev); + if (r) + return r; + + r = amdgpu_gfx_sysfs_init(adev); + if (r) + return r; + + return amdgpu_gfx_ras_sw_init(adev); +} + +static int gfx_v9_4_3_sw_fini(void *handle) +{ + int i, num_xcc; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < adev->gfx.num_compute_rings * num_xcc; i++) + amdgpu_ring_fini(&adev->gfx.compute_ring[i]); + + for (i = 0; i < num_xcc; i++) { + amdgpu_gfx_mqd_sw_fini(adev, i); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[i].ring); + amdgpu_gfx_kiq_fini(adev, i); + } + + gfx_v9_4_3_mec_fini(adev); + amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); + gfx_v9_4_3_free_microcode(adev); + amdgpu_gfx_sysfs_fini(adev); + + return 0; +} + +#define DEFAULT_SH_MEM_BASES (0x6000) +static void gfx_v9_4_3_xcc_init_compute_vmid(struct amdgpu_device *adev, + int xcc_id) +{ + int i; + uint32_t sh_mem_config; + uint32_t sh_mem_bases; + + /* + * Configure apertures: + * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) + * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) + * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) + */ + sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); + + sh_mem_config = SH_MEM_ADDRESS_MODE_64 | + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; + + mutex_lock(&adev->srbm_mutex); + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { + soc15_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id)); + /* CP and shaders */ + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSH_MEM_CONFIG, sh_mem_config); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSH_MEM_BASES, sh_mem_bases); + } + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); + mutex_unlock(&adev->srbm_mutex); + + /* Initialize all compute VMIDs to have no GDS, GWS, or OA + acccess. These should be enabled by FW for target VMIDs. */ + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_BASE, 2 * i, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_SIZE, 2 * i, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_GWS_VMID0, i, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_OA_VMID0, i, 0); + } +} + +static void gfx_v9_4_3_xcc_init_gds_vmid(struct amdgpu_device *adev, int xcc_id) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_BASE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_VMID0_SIZE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_GWS_VMID0, vmid, 0); + WREG32_SOC15_OFFSET(GC, GET_INST(GC, xcc_id), regGDS_OA_VMID0, vmid, 0); + } +} + +static void gfx_v9_4_3_xcc_constants_init(struct amdgpu_device *adev, + int xcc_id) +{ + u32 tmp; + int i; + + /* XXX SH_MEM regs */ + /* where to put LDS, scratch, GPUVM in FSA64 space */ + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { + soc15_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id)); + /* CP and shaders */ + if (i == 0) { + tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, + SH_MEM_ALIGNMENT_MODE_UNALIGNED); + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, + !!adev->gmc.noretry); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), + regSH_MEM_CONFIG, tmp); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), + regSH_MEM_BASES, 0); + } else { + tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, + SH_MEM_ALIGNMENT_MODE_UNALIGNED); + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, + !!adev->gmc.noretry); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), + regSH_MEM_CONFIG, tmp); + tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, + (adev->gmc.private_aperture_start >> + 48)); + tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, + (adev->gmc.shared_aperture_start >> + 48)); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), + regSH_MEM_BASES, tmp); + } + } + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, 0)); + + mutex_unlock(&adev->srbm_mutex); + + gfx_v9_4_3_xcc_init_compute_vmid(adev, xcc_id); + gfx_v9_4_3_xcc_init_gds_vmid(adev, xcc_id); +} + +static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) +{ + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + + gfx_v9_4_3_get_cu_info(adev, &adev->gfx.cu_info); + adev->gfx.config.db_debug2 = + RREG32_SOC15(GC, GET_INST(GC, 0), regDB_DEBUG2); + + for (i = 0; i < num_xcc; i++) + gfx_v9_4_3_xcc_constants_init(adev, i); +} + +static void +gfx_v9_4_3_xcc_enable_save_restore_machine(struct amdgpu_device *adev, + int xcc_id) +{ + WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_SRM_CNTL, SRM_ENABLE, 1); +} + +static void gfx_v9_4_3_xcc_init_pg(struct amdgpu_device *adev, int xcc_id) +{ + /* + * Rlc save restore list is workable since v2_1. + * And it's needed by gfxoff feature. + */ + if (adev->gfx.rlc.is_rlc_v2_1) + gfx_v9_4_3_xcc_enable_save_restore_machine(adev, xcc_id); +} + +static void gfx_v9_4_3_xcc_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id) +{ + uint32_t data; + + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG); + data |= CPC_PSP_DEBUG__UTCL2IUGPAOVERRIDE_MASK; + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG, data); +} + +static void gfx_v9_4_3_xcc_program_xcc_id(struct amdgpu_device *adev, + int xcc_id) +{ + uint32_t tmp = 0; + int num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + switch (num_xcc) { + /* directly config VIRTUAL_XCC_ID to 0 for 1-XCC */ + case 1: + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HYP_XCP_CTL, 0x8); + break; + case 2: + case 4: + case 6: + case 8: + tmp = (xcc_id % adev->gfx.num_xcc_per_xcp) << REG_FIELD_SHIFT(CP_HYP_XCP_CTL, VIRTUAL_XCC_ID); + tmp = tmp | (adev->gfx.num_xcc_per_xcp << REG_FIELD_SHIFT(CP_HYP_XCP_CTL, NUM_XCC_IN_XCP)); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HYP_XCP_CTL, tmp); + + break; + default: + break; |