diff options
| author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2019-07-04 10:20:48 +0200 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2019-07-04 10:20:48 +0200 |
| commit | a94de2e7a380064a6e8a4c3e5e10d97e8aa711f4 (patch) | |
| tree | 1026bbbd6ccc8241fe58bd3742353c647b459c5c /drivers | |
| parent | 60e8523e2ea18dc0c0cea69d6c1d69a065019062 (diff) | |
| parent | e8960ca06bb22d0d84edf246b0bf395e8322e127 (diff) | |
| download | linux-a94de2e7a380064a6e8a4c3e5e10d97e8aa711f4.tar.gz linux-a94de2e7a380064a6e8a4c3e5e10d97e8aa711f4.tar.bz2 linux-a94de2e7a380064a6e8a4c3e5e10d97e8aa711f4.zip | |
Merge tag 'misc-habanalabs-next-2019-07-04' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next
Oded writes:
This tag contains the following changes for kernel 5.3:
- Change the way the device's CPU access the host memory. This allows the
driver to use the kernel API of setting DMA mask in a standard way (call
it once).
- Add a new debugfs entry to show the status of the internal DMA and
compute engines. This is very helpful for debugging in case a command
submission get stuck.
- Return to the user a mask of the internal engines indicating their busy
state.
- Make sure to restore registers that can be modified by the user to their
default values. Only applies to registers that are initialized by the
driver.
- Elimination of redundant and dead-code.
- Support memset of the device's memory with size larger then 4GB
- Force the user to set the device to debug mode before configuring the
device's coresight infrastructure
- Improve error printing in case of interrupts from the device
* tag 'misc-habanalabs-next-2019-07-04' of git://people.freedesktop.org/~gabbayo/linux: (31 commits)
habanalabs: Add busy engines bitmask to HW idle IOCTL
habanalabs: Add debugfs node for engines status
habanalabs: Update the device idle check
habanalabs: Allow accessing host mapped addresses via debugfs
habanalabs: add WARN in case of bad MMU mapping
habanalabs: remove DMA mask hack for Goya
habanalabs: set Goya CPU to use ASIC MMU
habanalabs: add MMU mappings for Goya CPU
habanalabs: initialize MMU context for driver
habanalabs: de-couple MMU and VM module initialization
habanalabs: initialize device CPU queues after MMU init
docs/habanalabs: update text for some entries in sysfs
habanalabs: add rate-limit to an error message
habanalabs: remove simulator dedicated code
habanalabs: restore unsecured registers default values
habanalabs: clear sobs and monitors in context switch
habanalabs: make tpc registers secured
habanalabs: don't limit packet size for device CPU
habanalabs: support device memory memset > 4GB
habanalabs: print event name for fatal and non-RAZWI events
...
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/misc/habanalabs/asid.c | 2 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/command_submission.c | 10 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/context.c | 11 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/debugfs.c | 54 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/device.c | 189 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/firmware_if.c | 51 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 635 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/goya/goyaP.h | 16 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/goya/goya_security.c | 16 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/habanalabs.h | 93 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/habanalabs_drv.c | 66 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/habanalabs_ioctl.c | 11 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/hw_queue.c | 2 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_masks.h | 418 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h | 1 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/memory.c | 13 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/mmu.c | 20 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/pci.c | 10 | ||||
| -rw-r--r-- | drivers/misc/habanalabs/sysfs.c | 4 |
19 files changed, 1181 insertions, 441 deletions
diff --git a/drivers/misc/habanalabs/asid.c b/drivers/misc/habanalabs/asid.c index f54e7971a762..2c01461701a3 100644 --- a/drivers/misc/habanalabs/asid.c +++ b/drivers/misc/habanalabs/asid.c @@ -18,7 +18,7 @@ int hl_asid_init(struct hl_device *hdev) mutex_init(&hdev->asid_mutex); - /* ASID 0 is reserved for KMD */ + /* ASID 0 is reserved for KMD and device CPU */ set_bit(0, hdev->asid_bitmap); return 0; diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c index 6fe785e26859..6ad83d5ef4b0 100644 --- a/drivers/misc/habanalabs/command_submission.c +++ b/drivers/misc/habanalabs/command_submission.c @@ -682,14 +682,12 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) u32 tmp; rc = hl_poll_timeout_memory(hdev, - (u64) (uintptr_t) &ctx->thread_ctx_switch_wait_token, - jiffies_to_usecs(hdev->timeout_jiffies), - &tmp); + &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), + 100, jiffies_to_usecs(hdev->timeout_jiffies)); - if (rc || !tmp) { + if (rc == -ETIMEDOUT) { dev_err(hdev->dev, - "context switch phase didn't finish in time\n"); - rc = -ETIMEDOUT; + "context switch phase timeout (%d)\n", tmp); goto out; } } diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/context.c index f4c92f110a72..8682590e3f6e 100644 --- a/drivers/misc/habanalabs/context.c +++ b/drivers/misc/habanalabs/context.c @@ -31,9 +31,13 @@ static void hl_ctx_fini(struct hl_ctx *ctx) * Coresight might be still working by accessing addresses * related to the stopped engines. Hence stop it explicitly. */ - hdev->asic_funcs->halt_coresight(hdev); + if (hdev->in_debug) + hl_device_set_debug_mode(hdev, false); + hl_vm_ctx_fini(ctx); hl_asid_free(hdev, ctx->asid); + } else { + hl_mmu_ctx_fini(ctx); } } @@ -117,6 +121,11 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) if (is_kernel_ctx) { ctx->asid = HL_KERNEL_ASID_ID; /* KMD gets ASID 0 */ + rc = hl_mmu_ctx_init(ctx); + if (rc) { + dev_err(hdev->dev, "Failed to init mmu ctx module\n"); + goto mem_ctx_err; + } } else { ctx->asid = hl_asid_alloc(hdev); if (!ctx->asid) { diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index ba418aaa404c..18e499c900c7 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -355,7 +355,7 @@ static int mmu_show(struct seq_file *s, void *data) struct hl_debugfs_entry *entry = s->private; struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; - struct hl_ctx *ctx = hdev->user_ctx; + struct hl_ctx *ctx; u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0, hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0, @@ -367,6 +367,11 @@ static int mmu_show(struct seq_file *s, void *data) if (!hdev->mmu_enable) return 0; + if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID) + ctx = hdev->kernel_ctx; + else + ctx = hdev->user_ctx; + if (!ctx) { dev_err(hdev->dev, "no ctx available\n"); return 0; @@ -495,6 +500,36 @@ err: return -EINVAL; } +static int engines_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_device *hdev = dev_entry->hdev; + + hdev->asic_funcs->is_device_idle(hdev, NULL, s); + + return 0; +} + +static bool hl_is_device_va(struct hl_device *hdev, u64 addr) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + + if (!hdev->mmu_enable) + goto out; + + if (hdev->dram_supports_virtual_memory && + addr >= prop->va_space_dram_start_address && + addr < prop->va_space_dram_end_address) + return true; + + if (addr >= prop->va_space_host_start_address && + addr < prop->va_space_host_end_address) + return true; +out: + return false; +} + static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u64 *phys_addr) { @@ -568,7 +603,6 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf, { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; char tmp_buf[32]; u64 addr = entry->addr; u32 val; @@ -577,11 +611,8 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf, if (*ppos) return 0; - if (addr >= prop->va_space_dram_start_address && - addr < prop->va_space_dram_end_address && - hdev->mmu_enable && - hdev->dram_supports_virtual_memory) { - rc = device_va_to_pa(hdev, entry->addr, &addr); + if (hl_is_device_va(hdev, addr)) { + rc = device_va_to_pa(hdev, addr, &addr); if (rc) return rc; } @@ -602,7 +633,6 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; u64 addr = entry->addr; u32 value; ssize_t rc; @@ -611,11 +641,8 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, if (rc) return rc; - if (addr >= prop->va_space_dram_start_address && - addr < prop->va_space_dram_end_address && - hdev->mmu_enable && - hdev->dram_supports_virtual_memory) { - rc = device_va_to_pa(hdev, entry->addr, &addr); + if (hl_is_device_va(hdev, addr)) { + rc = device_va_to_pa(hdev, addr, &addr); if (rc) return rc; } @@ -877,6 +904,7 @@ static const struct hl_info_list hl_debugfs_list[] = { {"userptr", userptr_show, NULL}, {"vm", vm_show, NULL}, {"mmu", mmu_show, mmu_write}, + {"engines", engines_show, NULL} }; static int hl_debugfs_open(struct inode *inode, struct file *file) diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 0b19d3eefb98..0c4894dd9c02 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -231,6 +231,7 @@ static int device_early_init(struct hl_device *hdev) mutex_init(&hdev->fd_open_cnt_lock); mutex_init(&hdev->send_cpu_message_lock); + mutex_init(&hdev->debug_lock); mutex_init(&hdev->mmu_cache_lock); INIT_LIST_HEAD(&hdev->hw_queues_mirror_list); spin_lock_init(&hdev->hw_queues_mirror_lock); @@ -262,6 +263,7 @@ early_fini: static void device_early_fini(struct hl_device *hdev) { mutex_destroy(&hdev->mmu_cache_lock); + mutex_destroy(&hdev->debug_lock); mutex_destroy(&hdev->send_cpu_message_lock); hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); @@ -324,7 +326,15 @@ static int device_late_init(struct hl_device *hdev) { int rc; - INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job); + if (hdev->asic_funcs->late_init) { + rc = hdev->asic_funcs->late_init(hdev); + if (rc) { + dev_err(hdev->dev, + "failed late initialization for the H/W\n"); + return rc; + } + } + hdev->high_pll = hdev->asic_prop.high_pll; /* force setting to low frequency */ @@ -335,17 +345,9 @@ static int device_late_init(struct hl_device *hdev) else hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST); - if (hdev->asic_funcs->late_init) { - rc = hdev->asic_funcs->late_init(hdev); - if (rc) { - dev_err(hdev->dev, - "failed late initialization for the H/W\n"); - return rc; - } - } - + INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job); schedule_delayed_work(&hdev->work_freq, - usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC)); + usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC)); if (hdev->heartbeat) { INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat); @@ -420,6 +422,52 @@ int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq) return 1; } +int hl_device_set_debug_mode(struct hl_device *hdev, bool enable) +{ + int rc = 0; + + mutex_lock(&hdev->debug_lock); + + if (!enable) { + if (!hdev->in_debug) { + dev_err(hdev->dev, + "Failed to disable debug mode because device was not in debug mode\n"); + rc = -EFAULT; + goto out; + } + + hdev->asic_funcs->halt_coresight(hdev); + hdev->in_debug = 0; + + goto out; + } + + if (hdev->in_debug) { + dev_err(hdev->dev, + "Failed to enable debug mode because device is already in debug mode\n"); + rc = -EFAULT; + goto out; + } + + mutex_lock(&hdev->fd_open_cnt_lock); + + if (atomic_read(&hdev->fd_open_cnt) > 1) { + dev_err(hdev->dev, + "Failed to enable debug mode. More then a single user is using the device\n"); + rc = -EPERM; + goto unlock_fd_open_lock; + } + + hdev->in_debug = 1; + +unlock_fd_open_lock: + mutex_unlock(&hdev->fd_open_cnt_lock); +out: + mutex_unlock(&hdev->debug_lock); + + return rc; +} + /* * hl_device_suspend - initiate device suspend * @@ -647,13 +695,6 @@ again: hdev->hard_reset_pending = true; - if (!hdev->pdev) { - dev_err(hdev->dev, - "Reset action is NOT supported in simulator\n"); - rc = -EINVAL; - goto out_err; - } - device_reset_work = kzalloc(sizeof(*device_reset_work), GFP_ATOMIC); if (!device_reset_work) { @@ -704,6 +745,7 @@ again: if (hard_reset) { hl_vm_fini(hdev); + hl_mmu_fini(hdev); hl_eq_reset(hdev, &hdev->event_queue); } @@ -731,6 +773,13 @@ again: goto out_err; } + rc = hl_mmu_init(hdev); + if (rc) { + dev_err(hdev->dev, + "Failed to initialize MMU S/W after hard reset\n"); + goto out_err; + } + /* Allocate the kernel context */ hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL); @@ -902,11 +951,18 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) goto cq_fini; } + /* MMU S/W must be initialized before kernel context is created */ + rc = hl_mmu_init(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n"); + goto eq_fini; + } + /* Allocate the kernel context */ hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL); if (!hdev->kernel_ctx) { rc = -ENOMEM; - goto eq_fini; + goto mmu_fini; } hdev->user_ctx = NULL; @@ -954,8 +1010,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) goto out_disabled; } - /* After test_queues, KMD can start sending messages to device CPU */ - rc = device_late_init(hdev); if (rc) { dev_err(hdev->dev, "Failed late initialization\n"); @@ -1001,6 +1055,8 @@ release_ctx: "kernel ctx is still alive on initialization failure\n"); free_ctx: kfree(hdev->kernel_ctx); +mmu_fini: + hl_mmu_fini(hdev); eq_fini: hl_eq_fini(hdev, &hdev->event_queue); cq_fini: @@ -1105,6 +1161,8 @@ void hl_device_fini(struct hl_device *hdev) hl_vm_fini(hdev); + hl_mmu_fini(hdev); + hl_eq_fini(hdev, &hdev->event_queue); for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) @@ -1126,95 +1184,6 @@ void hl_device_fini(struct hl_device *hdev) } /* - * hl_poll_timeout_memory - Periodically poll a host memory address - * until it is not zero or a timeout occurs - * @hdev: pointer to habanalabs device structure - * @addr: Address to poll - * @timeout_us: timeout in us - * @val: Variable to read the value into - * - * Returns 0 on success and -ETIMEDOUT upon a timeout. In either - * case, the last read value at @addr is stored in @val. Must not - * be called from atomic context if sleep_us or timeout_us are used. - * - * The function sleeps for 100us with timeout value of - * timeout_us - */ -int hl_poll_timeout_memory(struct hl_device *hdev, u64 addr, - u32 timeout_us, u32 *val) -{ - /* - * address in this function points always to a memory location in the - * host's (server's) memory. That location is updated asynchronously - * either by the direct access of the device or by another core - */ - u32 *paddr = (u32 *) (uintptr_t) addr; - ktime_t timeout; - - /* timeout should be longer when working with simulator */ - if (!hdev->pdev) - timeout_us *= 10; - - timeout = ktime_add_us(ktime_get(), timeout_us); - - might_sleep(); - - for (;;) { - /* - * Flush CPU read/write buffers to make sure we read updates - * done by other cores or by the device - */ - mb(); - *val = *paddr; - if (*val) - break; - if (ktime_compare(ktime_get(), timeout) > 0) { - *val = *paddr; - break; - } - usleep_range((100 >> 2) + 1, 100); - } - - return *val ? 0 : -ETIMEDOUT; -} - -/* - * hl_poll_timeout_devicememory - Periodically poll a device memory address - * until it is not zero or a timeout occurs - * @hdev: pointer to habanalabs device structure - * @addr: Device address to poll - * @timeout_us: timeout in us - * @val: Variable to read the value into - * - * Returns 0 on success and -ETIMEDOUT upon a timeout. In either - * case, the last read value at @addr is stored in @val. Must not - * be called from atomic context if sleep_us or timeout_us are used. - * - * The function sleeps for 100us with timeout value of - * timeout_us - */ -int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr, - u32 timeout_us, u32 *val) -{ - ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); - - might_sleep(); - - for (;;) { - *val = readl(addr); - if (*val) - break; - if (ktime_compare(ktime_get(), timeout) > 0) { - *val = readl(addr); - break; - } - usleep_range((100 >> 2) + 1, 100); - } - - return *val ? 0 : -ETIMEDOUT; -} - -/* * MMIO register access helper functions. */ diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c index eda5d7fcb79f..cc8168bacb24 100644 --- a/drivers/misc/habanalabs/firmware_if.c +++ b/drivers/misc/habanalabs/firmware_if.c @@ -29,13 +29,13 @@ int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name, rc = request_firmware(&fw, fw_name, hdev->dev); if (rc) { - dev_err(hdev->dev, "Failed to request %s\n", fw_name); + dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name); goto out; } fw_size = fw->size; if ((fw_size % 4) != 0) { - dev_err(hdev->dev, "illegal %s firmware size %zu\n", + dev_err(hdev->dev, "Illegal %s firmware size %zu\n", fw_name, fw_size); rc = -EINVAL; goto out; @@ -85,12 +85,6 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, u32 tmp; int rc = 0; - if (len > HL_CPU_CB_SIZE) { - dev_err(hdev->dev, "Invalid CPU message size of %d bytes\n", - len); - return -ENOMEM; - } - pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len, &pkt_dma_addr); if (!pkt) { @@ -117,33 +111,28 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, goto out; } - rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) &pkt->fence, - timeout, &tmp); + rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp, + (tmp == ARMCP_PACKET_FENCE_VAL), 1000, timeout); hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); if (rc == -ETIMEDOUT) { - dev_err(hdev->dev, "Timeout while waiting for device CPU\n"); + dev_err(hdev->dev, "Device CPU packet timeout (0x%x)\n", tmp); hdev->device_cpu_disabled = true; goto out; } - if (tmp == ARMCP_PACKET_FENCE_VAL) { - u32 ctl = le32_to_cpu(pkt->ctl); + tmp = le32_to_cpu(pkt->ctl); - rc = (ctl & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT; - if (rc) { - dev_err(hdev->dev, - "F/W ERROR %d for CPU packet %d\n", - rc, (ctl & ARMCP_PKT_CTL_OPCODE_MASK) + rc = (tmp & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT; + if (rc) { + dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n", + rc, + (tmp & ARMCP_PKT_CTL_OPCODE_MASK) >> ARMCP_PKT_CTL_OPCODE_SHIFT); - rc = -EINVAL; - } else if (result) { - *result = (long) le64_to_cpu(pkt->result); - } - } else { - dev_err(hdev->dev, "CPU packet wrong fence value\n"); - rc = -EINVAL; + rc = -EIO; + } else if (result) { + *result = (long) le64_to_cpu(pkt->result); } out: @@ -186,9 +175,6 @@ void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, { u64 kernel_addr; - /* roundup to HL_CPU_PKT_SIZE */ - size = (size + (HL_CPU_PKT_SIZE - 1)) & HL_CPU_PKT_MASK; - kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size); *dma_handle = hdev->cpu_accessible_dma_address + @@ -200,9 +186,6 @@ void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr) { - /* roundup to HL_CPU_PKT_SIZE */ - size = (size + (HL_CPU_PKT_SIZE - 1)) & HL_CPU_PKT_MASK; - gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr, size); } @@ -256,7 +239,7 @@ int hl_fw_armcp_info_get(struct hl_device *hdev) HL_ARMCP_INFO_TIMEOUT_USEC, &result); if (rc) { dev_err(hdev->dev, - "Failed to send armcp info pkt, error %d\n", rc); + "Failed to send ArmCP info pkt, error %d\n", rc); goto out; } @@ -291,7 +274,7 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) max_size, &eeprom_info_dma_addr); if (!eeprom_info_cpu_addr) { dev_err(hdev->dev, - "Failed to allocate DMA memory for EEPROM info packet\n"); + "Failed to allocate DMA memory for ArmCP EEPROM packet\n"); return -ENOMEM; } @@ -307,7 +290,7 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) if (rc) { dev_err(hdev->dev, - "Failed to send armcp EEPROM pkt, error %d\n", rc); + "Failed to send ArmCP EEPROM packet, error %d\n", rc); goto out; } diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 02d116b01a1a..75294ec65257 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -14,6 +14,8 @@ #include <linux/genalloc.h> #include <linux/hwmon.h> #include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/iommu.h> +#include <linux/seq_file.h> /* * GOYA security scheme: @@ -89,6 +91,30 @@ #define GOYA_CB_POOL_CB_CNT 512 #define GOYA_CB_POOL_CB_SIZE 0x20000 /* 128KB */ +#define IS_QM_IDLE(engine, qm_glbl_sts0) \ + (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK) +#define IS_DMA_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(DMA, qm_glbl_sts0) +#define IS_TPC_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(TPC, qm_glbl_sts0) +#define IS_MME_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(MME, qm_glbl_sts0) + +#define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \ + (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \ + engine##_CMDQ_IDLE_MASK) +#define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \ + IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0) +#define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \ + IS_CMDQ_IDLE(MME, cmdq_glbl_sts0) + +#define IS_DMA_IDLE(dma_core_sts0) \ + !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK) + +#define IS_TPC_IDLE(tpc_cfg_sts) \ + (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK) + +#define IS_MME_IDLE(mme_arch_sts) \ + (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK) + + static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = { "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3", "goya cq 4", "goya cpu eq" @@ -297,6 +323,11 @@ static u32 goya_all_events[] = { GOYA_ASYNC_EVENT_ID_DMA_BM_CH4 }; +static int goya_mmu_clear_pgt_range(struct hl_device *hdev); +static int goya_mmu_set_dram_default_page(struct hl_device *hdev); +static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev); +static void goya_mmu_prepare(struct hl_device *hdev, u32 asid); + void goya_get_fixed_properties(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -467,7 +498,7 @@ static int goya_early_init(struct hl_device *hdev) prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID); - rc = hl_pci_init(hdev, 39); + rc = hl_pci_init(hdev, 48); if (rc) return rc; @@ -539,9 +570,36 @@ int goya_late_init(struct hl_device *hdev) struct asic_fixed_properties *prop = &hdev->asic_prop; int rc; + goya_fetch_psoc_frequency(hdev); + + rc = goya_mmu_clear_pgt_range(hdev); + if (rc) { + dev_err(hdev->dev, + "Failed to clear MMU page tables range %d\n", rc); + return rc; + } + + rc = goya_mmu_set_dram_default_page(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc); + return rc; + } + + rc = goya_mmu_add_mappings_for_device_cpu(hdev); + if (rc) + return rc; + + rc = goya_init_cpu_queues(hdev); + if (rc) + return rc; + + rc = goya_test_cpu_queue(hdev); + if (rc) + return rc; + rc = goya_armcp_info_get(hdev); if (rc) { - dev_err(hdev->dev, "Failed to get armcp info\n"); + dev_err(hdev->dev, "Failed to get armcp info %d\n", rc); return rc; } @@ -553,33 +611,15 @@ int goya_late_init(struct hl_device *hdev) rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS); if (rc) { - dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); + dev_err(hdev->dev, + "Failed to enable PCI access from CPU %d\n", rc); return rc; } WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GOYA_ASYNC_EVENT_ID_INTS_REGISTER); - goya_fetch_psoc_frequency(hdev); - - rc = goya_mmu_clear_pgt_range(hdev); - if (rc) { - dev_err(hdev->dev, "Failed to clear MMU page tables range\n"); - goto disable_pci_access; - } - - rc = goya_mmu_set_dram_default_page(hdev); - if (rc) { - dev_err(hdev->dev, "Failed to set DRAM default page\n"); - goto disable_pci_access; - } - return 0; - -disable_pci_access: - hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); - - return rc; } /* @@ -655,7 +695,10 @@ static int goya_sw_init(struct hl_device *hdev) goto free_dma_pool; } - hdev->cpu_accessible_dma_pool = gen_pool_create(HL_CPU_PKT_SHIFT, -1); + dev_dbg(hdev->dev, "cpu accessible memory at bus address 0x%llx\n", + hdev->cpu_accessible_dma_address); + + hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); if (!hdev->cpu_accessible_dma_pool) { dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n"); @@ -786,7 +829,6 @@ static void goya_init_dma_ch(struct hl_device *hdev, int dma_id) else sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007; - WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + reg_off, lower_32_bits(sob_addr)); WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr)); WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001); } @@ -973,9 +1015,9 @@ int goya_init_cpu_queues(struct hl_device *hdev) WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_3, upper_32_bits(eq->bus_address)); WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_8, - lower_32_bits(hdev->cpu_accessible_dma_address)); + lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR)); WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_9, - upper_32_bits(hdev->cpu_accessible_dma_address)); + upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR)); WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5, HL_QUEUE_SIZE_IN_BYTES); WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_4, HL_EQ_SIZE_IN_BYTES); @@ -1001,7 +1043,7 @@ int goya_init_cpu_queues(struct hl_device *hdev) if (err) { dev_err(hdev->dev, - "Failed to communicate with ARM CPU (ArmCP timeout)\n"); + "Failed to setup communication with device CPU\n"); return -EIO; } @@ -2061,10 +2103,12 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset) goya_disable_external_queues(hdev); goya_disable_internal_queues(hdev); - if (hard_reset) + if (hard_reset) { goya_disable_msix(hdev); - else + goya_mmu_remove_device_cpu_mappings(hdev); + } else { goya_sync_irqs(hdev); + } } /* @@ -2277,14 +2321,14 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout) goya_read_device_fw_version(hdev, FW_COMP_UBOOT); goya_read_device_fw_version(hdev, FW_COMP_PREBOOT); - if (status == CPU_BOOT_STATUS_SRAM_AVAIL) - goto out; - if (!hdev->fw_loading) { dev_info(hdev->dev, "Skip loading FW\n"); goto out; } + if (status == CPU_BOOT_STATUS_SRAM_AVAIL) + goto out; + rc = goya_push_linux_to_device(hdev); if (rc) return rc; @@ -2466,34 +2510,11 @@ static int goya_hw_init(struct hl_device *hdev) if (rc) goto disable_queues; - rc = goya_init_cpu_queues(hdev); - if (rc) { - dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", - rc); - goto disable_msix; - } - - /* - * Check if we managed to set the DMA mask to more then 32 bits. If so, - * let's try to increase it again because in Goya we set the initial - * dma mask to less then 39 bits so that the allocation of the memory - * area for the device's cpu will be under 39 bits - */ - if (hdev->dma_mask > 32) { - rc = hl_pci_set_dma_mask(hdev, 48); - if (rc) - goto disable_pci_access; - } - /* Perform read from the device to flush all MSI-X configuration */ val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); return 0; -disable_pci_access: - hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); -disable_msix: - goya_disable_msix(hdev); disable_queues: goya_disable_internal_queues(hdev); goya_disable_external_queues(hdev); @@ -2629,7 +2650,6 @@ static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) { u32 db_reg_offset, db_value; - bool invalid_queue = false; switch (hw_queue_id) { case GOYA_QUEUE_ID_DMA_0: @@ -2653,10 +2673,7 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) break; case GOYA_QUEUE_ID_CPU_PQ: - if (hdev->cpu_queues_enable) - db_reg_offset = mmCPU_IF_PF_PQ_PI; - else - invalid_queue = true; + db_reg_offset = mmCPU_IF_PF_PQ_PI; break; case GOYA_QUEUE_ID_MME: @@ -2696,12 +2713,8 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) break; default: - invalid_queue = true; - } - - if (invalid_queue) { /* Should never get here */ - dev_err(hdev->dev, "h/w queue %d is invalid. Can |
