diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 5299 |
1 files changed, 5299 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c new file mode 100644 index 000000000000..f384be0d1800 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -0,0 +1,5299 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/firmware.h> +#include <linux/module.h> +#include <linux/pci.h> +#include "amdgpu.h" +#include "amdgpu_gfx.h" +#include "amdgpu_psp.h" +#include "amdgpu_smu.h" +#include "amdgpu_atomfirmware.h" +#include "imu_v12_0.h" +#include "soc24.h" +#include "nvd.h" + +#include "gc/gc_12_0_0_offset.h" +#include "gc/gc_12_0_0_sh_mask.h" +#include "soc24_enum.h" +#include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" + +#include "soc15.h" +#include "soc15d.h" +#include "clearstate_gfx12.h" +#include "v12_structs.h" +#include "gfx_v12_0.h" +#include "nbif_v6_3_1.h" +#include "mes_v12_0.h" + +#define GFX12_NUM_GFX_RINGS 1 +#define GFX12_MEC_HPD_SIZE 2048 + +#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L + +MODULE_FIRMWARE("amdgpu/gc_12_0_0_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_0_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_0_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_0_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_0_toc.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_toc.bin"); + +static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2), + SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES), + SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_LO32), + SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_HI32), + SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR0), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_RS64_INSTR_PNTR), + + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + /* SE status registers */ + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3) +}; + +static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_12[] = { + /* compute registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS) +}; + +static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { + /* gfx queue registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) +}; + +#define DEFAULT_SH_MEM_CONFIG \ + ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ + (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ + (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) + +static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev); +static void gfx_v12_0_set_ring_funcs(struct amdgpu_device *adev); +static void gfx_v12_0_set_irq_funcs(struct amdgpu_device *adev); +static void gfx_v12_0_set_rlc_funcs(struct amdgpu_device *adev); +static void gfx_v12_0_set_mqd_funcs(struct amdgpu_device *adev); +static void gfx_v12_0_set_imu_funcs(struct amdgpu_device *adev); +static int gfx_v12_0_get_cu_info(struct amdgpu_device *adev, + struct amdgpu_cu_info *cu_info); +static uint64_t gfx_v12_0_get_gpu_clock_counter(struct amdgpu_device *adev); +static void gfx_v12_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 instance, int xcc_id); +static u32 gfx_v12_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); + +static void gfx_v12_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); +static void gfx_v12_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val); +static int gfx_v12_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); +static void gfx_v12_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, + uint16_t pasid, uint32_t flush_type, + bool all_hub, uint8_t dst_sel); +static void gfx_v12_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v12_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); +static void gfx_v12_0_update_perf_clk(struct amdgpu_device *adev, + bool enable); + +static void gfx_v12_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, + uint64_t queue_mask) +{ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | + PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ + amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); +} + +static void gfx_v12_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring) +{ + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = ring->wptr_gpu_addr; + uint32_t me = 0, eng_sel = 0; + + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_COMPUTE: + me = 1; + eng_sel = 0; + break; + case AMDGPU_RING_TYPE_GFX: + me = 0; + eng_sel = 4; + break; + case AMDGPU_RING_TYPE_MES: + me = 2; + eng_sel = 5; + break; + default: + WARN_ON(1); + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME((me)) | + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ + amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); +} + +static void gfx_v12_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + enum amdgpu_unmap_queues_action action, + u64 gpu_addr, u64 seq) +{ + struct amdgpu_device *adev = kiq_ring->adev; + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { + amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); + return; + } + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_UNMAP_QUEUES_ACTION(action) | + PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | + PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | + PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); + + if (action == PREEMPT_QUEUES_NO_UNMAP) { + amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(kiq_ring, seq); + } else { + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + } +} + +static void gfx_v12_0_kiq_query_status(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring, + u64 addr, u64 seq) +{ + uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); + amdgpu_ring_write(kiq_ring, + PACKET3_QUERY_STATUS_CONTEXT_ID(0) | + PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | + PACKET3_QUERY_STATUS_COMMAND(2)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | + PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); + amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); + amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); +} + +static void gfx_v12_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, + uint16_t pasid, + uint32_t flush_type, + bool all_hub) +{ + gfx_v12_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1); +} + +static const struct kiq_pm4_funcs gfx_v12_0_kiq_pm4_funcs = { + .kiq_set_resources = gfx_v12_0_kiq_set_resources, + .kiq_map_queues = gfx_v12_0_kiq_map_queues, + .kiq_unmap_queues = gfx_v12_0_kiq_unmap_queues, + .kiq_query_status = gfx_v12_0_kiq_query_status, + .kiq_invalidate_tlbs = gfx_v12_0_kiq_invalidate_tlbs, + .set_resources_size = 8, + .map_queues_size = 7, + .unmap_queues_size = 6, + .query_status_size = 7, + .invalidate_tlbs_size = 2, +}; + +static void gfx_v12_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) +{ + adev->gfx.kiq[0].pmf = &gfx_v12_0_kiq_pm4_funcs; +} + +static void gfx_v12_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, + uint32_t mask, uint32_t inv) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static int gfx_v12_0_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); + uint32_t tmp = 0; + unsigned i; + int r; + + WREG32(scratch, 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 5); + if (r) { + dev_err(adev->dev, + "amdgpu: cp failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) { + gfx_v12_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF); + } else { + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); + amdgpu_ring_write(ring, scratch - + PACKET3_SET_UCONFIG_REG_START); + amdgpu_ring_write(ring, 0xDEADBEEF); + } + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + if (amdgpu_emu_mode == 1) + msleep(1); + else + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + return r; +} + +static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + unsigned index; + uint64_t gpu_addr; + volatile uint32_t *cpu_ptr; + long r; + + /* MES KIQ fw hasn't indirect buffer support for now */ + if (adev->enable_mes_kiq && + ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + return 0; + + memset(&ib, 0, sizeof(ib)); + + if (ring->is_mes_queue) { + uint32_t padding, offset; + + offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); + padding = amdgpu_mes_ctx_get_offs(ring, + AMDGPU_MES_CTX_PADDING_OFFS); + + ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); + ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + + gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); + cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); + *cpu_ptr = cpu_to_le32(0xCAFEDEAD); + } else { + r = amdgpu_device_wb_get(adev, &index); + if (r) + return r; + + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + cpu_ptr = &adev->wb.wb[index]; + + r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r); + goto err1; + } + } + + ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); + ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; + ib.ptr[2] = lower_32_bits(gpu_addr); + ib.ptr[3] = upper_32_bits(gpu_addr); + ib.ptr[4] = 0xDEADBEEF; + ib.length_dw = 5; + + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) + goto err2; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) { + r = -ETIMEDOUT; + goto err2; + } else if (r < 0) { + goto err2; + } + + if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF) + r = 0; + else + r = -EINVAL; +err2: + if (!ring->is_mes_queue) + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); +err1: + if (!ring->is_mes_queue) + amdgpu_device_wb_free(adev, index); + return r; +} + +static void gfx_v12_0_free_microcode(struct amdgpu_device *adev) +{ + amdgpu_ucode_release(&adev->gfx.pfp_fw); + amdgpu_ucode_release(&adev->gfx.me_fw); + amdgpu_ucode_release(&adev->gfx.rlc_fw); + amdgpu_ucode_release(&adev->gfx.mec_fw); + + kfree(adev->gfx.rlc.register_list_format); +} + +static int gfx_v12_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix) +{ + const struct psp_firmware_header_v1_0 *toc_hdr; + int err = 0; + + err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, + "amdgpu/%s_toc.bin", ucode_prefix); + if (err) + goto out; + + toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; + adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); + adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); + adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); + adev->psp.toc.start_addr = (uint8_t *)toc_hdr + + le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); + return 0; +out: + amdgpu_ucode_release(&adev->psp.toc_fw); + return err; +} + +static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) +{ + char ucode_prefix[15]; + int err; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + uint16_t version_major; + uint16_t version_minor; + + DRM_DEBUG("\n"); + + amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); + + err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + "amdgpu/%s_pfp.bin", ucode_prefix); + if (err) + goto out; + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); + + err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + "amdgpu/%s_me.bin", ucode_prefix); + if (err) + goto out; + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); + + if (!amdgpu_sriov_vf(adev)) { + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + "amdgpu/%s_rlc.bin", ucode_prefix); + if (err) + goto out; + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + version_major = le16_to_cpu(rlc_hdr->header.header_version_major); + version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); + err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); + if (err) + goto out; + } + + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + "amdgpu/%s_mec.bin", ucode_prefix); + if (err) + goto out; + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); + amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) + err = gfx_v12_0_init_toc_microcode(adev, ucode_prefix); + + /* only one MEC for gfx 12 */ + adev->gfx.mec2_fw = NULL; + + if (adev->gfx.imu.funcs) { + if (adev->gfx.imu.funcs->init_microcode) { + err = adev->gfx.imu.funcs->init_microcode(adev); + if (err) + dev_err(adev->dev, "Failed to load imu firmware!\n"); + } + } + +out: + if (err) { + amdgpu_ucode_release(&adev->gfx.pfp_fw); + amdgpu_ucode_release(&adev->gfx.me_fw); + amdgpu_ucode_release(&adev->gfx.rlc_fw); + amdgpu_ucode_release(&adev->gfx.mec_fw); + } + + return err; +} + +static u32 gfx_v12_0_get_csb_size(struct amdgpu_device *adev) +{ + u32 count = 0; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + count += 1; + + for (sect = gfx12_cs_data; sect->section != NULL; ++sect) { + if (sect->id == SECT_CONTEXT) { + for (ext = sect->section; ext->extent != NULL; ++ext) + count += 2 + ext->reg_count; + } else + return 0; + } + + return count; +} + +static void gfx_v12_0_get_csb_buffer(struct amdgpu_device *adev, + volatile u32 *buffer) +{ + u32 count = 0, clustercount = 0, i; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + if (adev->gfx.rlc.cs_data == NULL) + return; + if (buffer == NULL) + return; + + count += 1; + + for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { + if (sect->id == SECT_CONTEXT) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + clustercount++; + buffer[count++] = ext->reg_count; + buffer[count++] = ext->reg_index; + + for (i = 0; i < ext->reg_count; i++) + buffer[count++] = cpu_to_le32(ext->extent[i]); + } + } else + return; + } + + buffer[0] = clustercount; +} + +static void gfx_v12_0_rlc_fini(struct amdgpu_device *adev) +{ + /* clear state block */ + amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); + + /* jump table block */ + amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, + &adev->gfx.rlc.cp_table_gpu_addr, + (void **)&adev->gfx.rlc.cp_table_ptr); +} + +static void gfx_v12_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) +{ + struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; + + reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; + reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); + reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1); + reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2); + reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3); + reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL); + reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX); + reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0); + adev->gfx.rlc.rlcg_reg_access_supported = true; +} + +static int gfx_v12_0_rlc_init(struct amdgpu_device *adev) +{ + const struct cs_section_def *cs_data; + int r; + + adev->gfx.rlc.cs_data = gfx12_cs_data; + + cs_data = adev->gfx.rlc.cs_data; + + if (cs_data) { + /* init clear state block */ + r = amdgpu_gfx_rlc_init_csb(adev); + if (r) + return r; + } + + /* init spm vmid with 0xf */ + if (adev->gfx.rlc.funcs->update_spm_vmid) + adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); + + return 0; +} + +static void gfx_v12_0_mec_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL); +} + +static void gfx_v12_0_me_init(struct amdgpu_device *adev) +{ + bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); + + amdgpu_gfx_graphics_queue_acquire(adev); +} + +static int gfx_v12_0_mec_init(struct amdgpu_device *adev) +{ + int r; + u32 *hpd; + size_t mec_hpd_size; + + bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /* take ownership of the relevant compute queues */ + amdgpu_gfx_compute_queue_acquire(adev); + mec_hpd_size = adev->gfx.num_compute_rings * GFX12_MEC_HPD_SIZE; + + if (mec_hpd_size) { + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.hpd_eop_obj, + &adev->gfx.mec.hpd_eop_gpu_addr, + (void **)&hpd); + if (r) { + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); + gfx_v12_0_mec_fini(adev); + return r; + } + + memset(hpd, 0, mec_hpd_size); + + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + } + + return 0; +} + +static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) +{ + WREG32_SOC15(GC, 0, regSQ_IND_INDEX, + (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | + (address << SQ_IND_INDEX__INDEX__SHIFT)); + return RREG32_SOC15(GC, 0, regSQ_IND_DATA); +} + +static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, + uint32_t thread, uint32_t regno, + uint32_t num, uint32_t *out) +{ + WREG32_SOC15(GC, 0, regSQ_IND_INDEX, + (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | + (regno << SQ_IND_INDEX__INDEX__SHIFT) | + (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | + (SQ_IND_INDEX__AUTO_INCR_MASK)); + while (num--) + *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); +} + +static void gfx_v12_0_read_wave_data(struct amdgpu_device *adev, + uint32_t xcc_id, + uint32_t simd, uint32_t wave, + uint32_t *dst, int *no_fields) +{ + /* in gfx12 the SIMD_ID is specified as part of the INSTANCE + * field when performing a select_se_sh so it should be + * zero here */ + WARN_ON(simd != 0); + + /* type 4 wave data */ + dst[(*no_fields)++] = 4; + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATE_PRIV); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_PRIV); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXCP_FLAG_USER); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAP_CTRL); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_ACTIVE); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_VALID_AND_IDLE); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_DVGPR_ALLOC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_SCHED_MODE); +} + +static void gfx_v12_0_read_wave_sgprs(struct amdgpu_device *adev, + uint32_t xcc_id, uint32_t simd, + uint32_t wave, uint32_t start, + uint32_t size, uint32_t *dst) +{ + WARN_ON(simd != 0); + + wave_read_regs( + adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, + dst); +} + +static void gfx_v12_0_read_wave_vgprs(struct amdgpu_device *adev, + uint32_t xcc_id, uint32_t simd, + uint32_t wave, uint32_t thread, + uint32_t start, uint32_t size, + uint32_t *dst) +{ + wave_read_regs( + adev, wave, thread, + start + SQIND_WAVE_VGPRS_OFFSET, size, dst); +} + +static void gfx_v12_0_select_me_pipe_q(struct amdgpu_device *adev, + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) +{ + soc24_grbm_select(adev, me, pipe, q, vm); +} + +static const struct amdgpu_gfx_funcs gfx_v12_0_gfx_funcs = { + .get_gpu_clock_counter = &gfx_v12_0_get_gpu_clock_counter, + .select_se_sh = &gfx_v12_0_select_se_sh, + .read_wave_data = &gfx_v12_0_read_wave_data, + .read_wave_sgprs = &gfx_v12_0_read_wave_sgprs, + .read_wave_vgprs = &gfx_v12_0_read_wave_vgprs, + .select_me_pipe_q = &gfx_v12_0_select_me_pipe_q, + .update_perfmon_mgcg = &gfx_v12_0_update_perf_clk, +}; + +static int gfx_v12_0_gpu_early_init(struct amdgpu_device *adev) +{ + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + break; + default: + BUG(); + break; + } + + return 0; +} + +static int gfx_v12_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, + int me, int pipe, int queue) +{ + int r; + struct amdgpu_ring *ring; + unsigned int irq_type; + + ring = &adev->gfx.gfx_ring[ring_id]; + + ring->me = me; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + + if (!ring_id) + ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; + else + ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; + ring->vm_hub = AMDGPU_GFXHUB(0); + sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; + r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, + AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + return 0; +} + +static int gfx_v12_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + int mec, int pipe, int queue) +{ + int r; + unsigned irq_type; + struct amdgpu_ring *ring; + unsigned int hw_prio; + + ring = &adev->gfx.compute_ring[ring_id]; + + /* mec0 is me1 */ + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + + (ring_id * GFX12_MEC_HPD_SIZE); + ring->vm_hub = AMDGPU_GFXHUB(0); + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? + AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; + /* type-2 packets are deprecated on MEC, use type-3 instead */ + r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, + hw_prio, NULL); + if (r) + return r; + + return 0; +} + +static struct { + SOC24_FIRMWARE_ID id; + unsigned int offset; + unsigned int size; + unsigned int size_x16; +} rlc_autoload_info[SOC24_FIRMWARE_ID_MAX]; + +#define RLC_TOC_OFFSET_DWUNIT 8 +#define RLC_SIZE_MULTIPLE 1024 +#define RLC_TOC_UMF_SIZE_inM 23ULL +#define RLC_TOC_FORMAT_API 165ULL + +static void gfx_v12_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc) +{ + RLC_TABLE_OF_CONTENT_V2 *ucode = rlc_toc; + + while (ucode && (ucode->id > SOC24_FIRMWARE_ID_INVALID)) { + rlc_autoload_info[ucode->id].id = ucode->id; + rlc_autoload_info[ucode->id].offset = + ucode->offset * RLC_TOC_OFFSET_DWUNIT * 4; + rlc_autoload_info[ucode->id].size = + ucode->size_x16 ? ucode->size * RLC_SIZE_MULTIPLE * 4 : + ucode->size * 4; + ucode++; + } +} + +static uint32_t gfx_v12_0_calc_toc_total_size(struct amdgpu_device *adev) +{ + uint32_t total_size = 0; + SOC24_FIRMWARE_ID id; + + gfx_v12_0_parse_rlc_toc(adev, adev->psp.toc.start_addr); + + for (id = SOC24_FIRMWARE_ID_RLC_G_UCODE; id < SOC24_FIRMWARE_ID_MAX; id++) + total_size += rlc_autoload_info[id].size; + + /* In case the offset in rlc toc ucode is aligned */ + if (total_size < rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset) + total_size = rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset + + rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].size; + if (total_size < (RLC_TOC_UMF_SIZE_inM << 20)) + total_size = RLC_TOC_UMF_SIZE_inM << 20; + + return total_size; +} + +static int gfx_v12_0_rlc_autoload_buffer_init(struct amdgpu_device *adev) +{ + int r; + uint32_t total_size; + + total_size = gfx_v12_0_calc_toc_total_size(adev); + + r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.rlc_autoload_bo, + &adev->gfx.rlc.rlc_autoload_gpu_addr, + (void **)&adev->gfx.rlc.rlc_autoload_ptr); + |