summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/debugfs-driver-habanalabs18
-rw-r--r--Documentation/ABI/testing/sysfs-driver-habanalabs42
-rw-r--r--drivers/misc/habanalabs/asid.c2
-rw-r--r--drivers/misc/habanalabs/command_submission.c10
-rw-r--r--drivers/misc/habanalabs/context.c11
-rw-r--r--drivers/misc/habanalabs/debugfs.c54
-rw-r--r--drivers/misc/habanalabs/device.c189
-rw-r--r--drivers/misc/habanalabs/firmware_if.c51
-rw-r--r--drivers/misc/habanalabs/goya/goya.c635
-rw-r--r--drivers/misc/habanalabs/goya/goyaP.h16
-rw-r--r--drivers/misc/habanalabs/goya/goya_security.c16
-rw-r--r--drivers/misc/habanalabs/habanalabs.h93
-rw-r--r--drivers/misc/habanalabs/habanalabs_drv.c66
-rw-r--r--drivers/misc/habanalabs/habanalabs_ioctl.c11
-rw-r--r--drivers/misc/habanalabs/hw_queue.c2
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_masks.h418
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h1
-rw-r--r--drivers/misc/habanalabs/memory.c13
-rw-r--r--drivers/misc/habanalabs/mmu.c20
-rw-r--r--drivers/misc/habanalabs/pci.c10
-rw-r--r--drivers/misc/habanalabs/sysfs.c4
-rw-r--r--include/uapi/misc/habanalabs.h30
22 files changed, 1249 insertions, 463 deletions
diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index 2f5b80be07a3..f0ac14b70ecb 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -3,7 +3,10 @@ Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Description: Sets the device address to be used for read or write through
- PCI bar. The acceptable value is a string that starts with "0x"
+ PCI bar, or the device VA of a host mapped memory to be read or
+ written directly from the host. The latter option is allowed
+ only when the IOMMU is disabled.
+ The acceptable value is a string that starts with "0x"
What: /sys/kernel/debug/habanalabs/hl<n>/command_buffers
Date: Jan 2019
@@ -33,10 +36,12 @@ Contact: oded.gabbay@gmail.com
Description: Allows the root user to read or write directly through the
device's PCI bar. Writing to this file generates a write
transaction while reading from the file generates a read
- transcation. This custom interface is needed (instead of using
+ transaction. This custom interface is needed (instead of using
the generic Linux user-space PCI mapping) because the DDR bar
is very small compared to the DDR memory and only the driver can
- move the bar before and after the transaction
+ move the bar before and after the transaction.
+ If the IOMMU is disabled, it also allows the root user to read
+ or write from the host a device VA of a host mapped memory
What: /sys/kernel/debug/habanalabs/hl<n>/device
Date: Jan 2019
@@ -46,6 +51,13 @@ Description: Enables the root user to set the device to specific state.
Valid values are "disable", "enable", "suspend", "resume".
User can read this property to see the valid values
+What: /sys/kernel/debug/habanalabs/hl<n>/engines
+Date: Jul 2019
+KernelVersion: 5.3
+Contact: oded.gabbay@gmail.com
+Description: Displays the status registers values of the device engines and
+ their derived idle status
+
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_addr
Date: Jan 2019
KernelVersion: 5.1
diff --git a/Documentation/ABI/testing/sysfs-driver-habanalabs b/Documentation/ABI/testing/sysfs-driver-habanalabs
index 78b2bcf316a3..f433fc6db3c6 100644
--- a/Documentation/ABI/testing/sysfs-driver-habanalabs
+++ b/Documentation/ABI/testing/sysfs-driver-habanalabs
@@ -62,18 +62,20 @@ What: /sys/class/habanalabs/hl<n>/ic_clk
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
-Description: Allows the user to set the maximum clock frequency of the
- Interconnect fabric. Writes to this parameter affect the device
- only when the power management profile is set to "manual" mode.
- The device IC clock might be set to lower value then the
+Description: Allows the user to set the maximum clock frequency, in Hz, of
+ the Interconnect fabric. Writes to this parameter affect the
+ device only when the power management profile is set to "manual"
+ mode. The device IC clock might be set to lower value than the
maximum. The user should read the ic_clk_curr to see the actual
- frequency value of the IC
+ frequency value of the IC. This property is valid only for the
+ Goya ASIC family
What: /sys/class/habanalabs/hl<n>/ic_clk_curr
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
-Description: Displays the current clock frequency of the Interconnect fabric
+Description: Displays the current clock frequency, in Hz, of the Interconnect
+ fabric. This property is valid only for the Goya ASIC family
What: /sys/class/habanalabs/hl<n>/infineon_ver
Date: Jan 2019
@@ -92,18 +94,20 @@ What: /sys/class/habanalabs/hl<n>/mme_clk
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
-Description: Allows the user to set the maximum clock frequency of the
- MME compute engine. Writes to this parameter affect the device
- only when the power management profile is set to "manual" mode.
- The device MME clock might be set to lower value then the
+Description: Allows the user to set the maximum clock frequency, in Hz, of
+ the MME compute engine. Writes to this parameter affect the
+ device only when the power management profile is set to "manual"
+ mode. The device MME clock might be set to lower value than the
maximum. The user should read the mme_clk_curr to see the actual
- frequency value of the MME
+ frequency value of the MME. This property is valid only for the
+ Goya ASIC family
What: /sys/class/habanalabs/hl<n>/mme_clk_curr
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
-Description: Displays the current clock frequency of the MME compute engine
+Description: Displays the current clock frequency, in Hz, of the MME compute
+ engine. This property is valid only for the Goya ASIC family
What: /sys/class/habanalabs/hl<n>/pci_addr
Date: Jan 2019
@@ -163,18 +167,20 @@ What: /sys/class/habanalabs/hl<n>/tpc_clk
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
-Description: Allows the user to set the maximum clock frequency of the
- TPC compute engines. Writes to this parameter affect the device
- only when the power management profile is set to "manual" mode.
- The device TPC clock might be set to lower value then the
+Description: Allows the user to set the maximum clock frequency, in Hz, of
+ the TPC compute engines. Writes to this parameter affect the
+ device only when the power management profile is set to "manual"
+ mode. The device TPC clock might be set to lower value than the
maximum. The user should read the tpc_clk_curr to see the actual
- frequency value of the TPC
+ frequency value of the TPC. This property is valid only for
+ Goya ASIC family
What: /sys/class/habanalabs/hl<n>/tpc_clk_curr
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
-Description: Displays the current clock frequency of the TPC compute engines
+Description: Displays the current clock frequency, in Hz, of the TPC compute
+ engines. This property is valid only for the Goya ASIC family
What: /sys/class/habanalabs/hl<n>/uboot_ver
Date: Jan 2019
diff --git a/drivers/misc/habanalabs/asid.c b/drivers/misc/habanalabs/asid.c
index f54e7971a762..2c01461701a3 100644
--- a/drivers/misc/habanalabs/asid.c
+++ b/drivers/misc/habanalabs/asid.c
@@ -18,7 +18,7 @@ int hl_asid_init(struct hl_device *hdev)
mutex_init(&hdev->asid_mutex);
- /* ASID 0 is reserved for KMD */
+ /* ASID 0 is reserved for KMD and device CPU */
set_bit(0, hdev->asid_bitmap);
return 0;
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index 6fe785e26859..6ad83d5ef4b0 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -682,14 +682,12 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
u32 tmp;
rc = hl_poll_timeout_memory(hdev,
- (u64) (uintptr_t) &ctx->thread_ctx_switch_wait_token,
- jiffies_to_usecs(hdev->timeout_jiffies),
- &tmp);
+ &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
+ 100, jiffies_to_usecs(hdev->timeout_jiffies));
- if (rc || !tmp) {
+ if (rc == -ETIMEDOUT) {
dev_err(hdev->dev,
- "context switch phase didn't finish in time\n");
- rc = -ETIMEDOUT;
+ "context switch phase timeout (%d)\n", tmp);
goto out;
}
}
diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/context.c
index f4c92f110a72..8682590e3f6e 100644
--- a/drivers/misc/habanalabs/context.c
+++ b/drivers/misc/habanalabs/context.c
@@ -31,9 +31,13 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
* Coresight might be still working by accessing addresses
* related to the stopped engines. Hence stop it explicitly.
*/
- hdev->asic_funcs->halt_coresight(hdev);
+ if (hdev->in_debug)
+ hl_device_set_debug_mode(hdev, false);
+
hl_vm_ctx_fini(ctx);
hl_asid_free(hdev, ctx->asid);
+ } else {
+ hl_mmu_ctx_fini(ctx);
}
}
@@ -117,6 +121,11 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
if (is_kernel_ctx) {
ctx->asid = HL_KERNEL_ASID_ID; /* KMD gets ASID 0 */
+ rc = hl_mmu_ctx_init(ctx);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to init mmu ctx module\n");
+ goto mem_ctx_err;
+ }
} else {
ctx->asid = hl_asid_alloc(hdev);
if (!ctx->asid) {
diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c
index ba418aaa404c..18e499c900c7 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/debugfs.c
@@ -355,7 +355,7 @@ static int mmu_show(struct seq_file *s, void *data)
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
- struct hl_ctx *ctx = hdev->user_ctx;
+ struct hl_ctx *ctx;
u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0,
hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0,
@@ -367,6 +367,11 @@ static int mmu_show(struct seq_file *s, void *data)
if (!hdev->mmu_enable)
return 0;
+ if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID)
+ ctx = hdev->kernel_ctx;
+ else
+ ctx = hdev->user_ctx;
+
if (!ctx) {
dev_err(hdev->dev, "no ctx available\n");
return 0;
@@ -495,6 +500,36 @@ err:
return -EINVAL;
}
+static int engines_show(struct seq_file *s, void *data)
+{
+ struct hl_debugfs_entry *entry = s->private;
+ struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ struct hl_device *hdev = dev_entry->hdev;
+
+ hdev->asic_funcs->is_device_idle(hdev, NULL, s);
+
+ return 0;
+}
+
+static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+
+ if (!hdev->mmu_enable)
+ goto out;
+
+ if (hdev->dram_supports_virtual_memory &&
+ addr >= prop->va_space_dram_start_address &&
+ addr < prop->va_space_dram_end_address)
+ return true;
+
+ if (addr >= prop->va_space_host_start_address &&
+ addr < prop->va_space_host_end_address)
+ return true;
+out:
+ return false;
+}
+
static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
u64 *phys_addr)
{
@@ -568,7 +603,6 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf,
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
char tmp_buf[32];
u64 addr = entry->addr;
u32 val;
@@ -577,11 +611,8 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf,
if (*ppos)
return 0;
- if (addr >= prop->va_space_dram_start_address &&
- addr < prop->va_space_dram_end_address &&
- hdev->mmu_enable &&
- hdev->dram_supports_virtual_memory) {
- rc = device_va_to_pa(hdev, entry->addr, &addr);
+ if (hl_is_device_va(hdev, addr)) {
+ rc = device_va_to_pa(hdev, addr, &addr);
if (rc)
return rc;
}
@@ -602,7 +633,6 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 addr = entry->addr;
u32 value;
ssize_t rc;
@@ -611,11 +641,8 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
if (rc)
return rc;
- if (addr >= prop->va_space_dram_start_address &&
- addr < prop->va_space_dram_end_address &&
- hdev->mmu_enable &&
- hdev->dram_supports_virtual_memory) {
- rc = device_va_to_pa(hdev, entry->addr, &addr);
+ if (hl_is_device_va(hdev, addr)) {
+ rc = device_va_to_pa(hdev, addr, &addr);
if (rc)
return rc;
}
@@ -877,6 +904,7 @@ static const struct hl_info_list hl_debugfs_list[] = {
{"userptr", userptr_show, NULL},
{"vm", vm_show, NULL},
{"mmu", mmu_show, mmu_write},
+ {"engines", engines_show, NULL}
};
static int hl_debugfs_open(struct inode *inode, struct file *file)
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index 0b19d3eefb98..0c4894dd9c02 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -231,6 +231,7 @@ static int device_early_init(struct hl_device *hdev)
mutex_init(&hdev->fd_open_cnt_lock);
mutex_init(&hdev->send_cpu_message_lock);
+ mutex_init(&hdev->debug_lock);
mutex_init(&hdev->mmu_cache_lock);
INIT_LIST_HEAD(&hdev->hw_queues_mirror_list);
spin_lock_init(&hdev->hw_queues_mirror_lock);
@@ -262,6 +263,7 @@ early_fini:
static void device_early_fini(struct hl_device *hdev)
{
mutex_destroy(&hdev->mmu_cache_lock);
+ mutex_destroy(&hdev->debug_lock);
mutex_destroy(&hdev->send_cpu_message_lock);
hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
@@ -324,7 +326,15 @@ static int device_late_init(struct hl_device *hdev)
{
int rc;
- INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
+ if (hdev->asic_funcs->late_init) {
+ rc = hdev->asic_funcs->late_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "failed late initialization for the H/W\n");
+ return rc;
+ }
+ }
+
hdev->high_pll = hdev->asic_prop.high_pll;
/* force setting to low frequency */
@@ -335,17 +345,9 @@ static int device_late_init(struct hl_device *hdev)
else
hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
- if (hdev->asic_funcs->late_init) {
- rc = hdev->asic_funcs->late_init(hdev);
- if (rc) {
- dev_err(hdev->dev,
- "failed late initialization for the H/W\n");
- return rc;
- }
- }
-
+ INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
schedule_delayed_work(&hdev->work_freq,
- usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
+ usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
if (hdev->heartbeat) {
INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
@@ -420,6 +422,52 @@ int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
return 1;
}
+int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
+{
+ int rc = 0;
+
+ mutex_lock(&hdev->debug_lock);
+
+ if (!enable) {
+ if (!hdev->in_debug) {
+ dev_err(hdev->dev,
+ "Failed to disable debug mode because device was not in debug mode\n");
+ rc = -EFAULT;
+ goto out;
+ }
+
+ hdev->asic_funcs->halt_coresight(hdev);
+ hdev->in_debug = 0;
+
+ goto out;
+ }
+
+ if (hdev->in_debug) {
+ dev_err(hdev->dev,
+ "Failed to enable debug mode because device is already in debug mode\n");
+ rc = -EFAULT;
+ goto out;
+ }
+
+ mutex_lock(&hdev->fd_open_cnt_lock);
+
+ if (atomic_read(&hdev->fd_open_cnt) > 1) {
+ dev_err(hdev->dev,
+ "Failed to enable debug mode. More then a single user is using the device\n");
+ rc = -EPERM;
+ goto unlock_fd_open_lock;
+ }
+
+ hdev->in_debug = 1;
+
+unlock_fd_open_lock:
+ mutex_unlock(&hdev->fd_open_cnt_lock);
+out:
+ mutex_unlock(&hdev->debug_lock);
+
+ return rc;
+}
+
/*
* hl_device_suspend - initiate device suspend
*
@@ -647,13 +695,6 @@ again:
hdev->hard_reset_pending = true;
- if (!hdev->pdev) {
- dev_err(hdev->dev,
- "Reset action is NOT supported in simulator\n");
- rc = -EINVAL;
- goto out_err;
- }
-
device_reset_work = kzalloc(sizeof(*device_reset_work),
GFP_ATOMIC);
if (!device_reset_work) {
@@ -704,6 +745,7 @@ again:
if (hard_reset) {
hl_vm_fini(hdev);
+ hl_mmu_fini(hdev);
hl_eq_reset(hdev, &hdev->event_queue);
}
@@ -731,6 +773,13 @@ again:
goto out_err;
}
+ rc = hl_mmu_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to initialize MMU S/W after hard reset\n");
+ goto out_err;
+ }
+
/* Allocate the kernel context */
hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
GFP_KERNEL);
@@ -902,11 +951,18 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
goto cq_fini;
}
+ /* MMU S/W must be initialized before kernel context is created */
+ rc = hl_mmu_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
+ goto eq_fini;
+ }
+
/* Allocate the kernel context */
hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
if (!hdev->kernel_ctx) {
rc = -ENOMEM;
- goto eq_fini;
+ goto mmu_fini;
}
hdev->user_ctx = NULL;
@@ -954,8 +1010,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
goto out_disabled;
}
- /* After test_queues, KMD can start sending messages to device CPU */
-
rc = device_late_init(hdev);
if (rc) {
dev_err(hdev->dev, "Failed late initialization\n");
@@ -1001,6 +1055,8 @@ release_ctx:
"kernel ctx is still alive on initialization failure\n");
free_ctx:
kfree(hdev->kernel_ctx);
+mmu_fini:
+ hl_mmu_fini(hdev);
eq_fini:
hl_eq_fini(hdev, &hdev->event_queue);
cq_fini:
@@ -1105,6 +1161,8 @@ void hl_device_fini(struct hl_device *hdev)
hl_vm_fini(hdev);
+ hl_mmu_fini(hdev);
+
hl_eq_fini(hdev, &hdev->event_queue);
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
@@ -1126,95 +1184,6 @@ void hl_device_fini(struct hl_device *hdev)
}
/*
- * hl_poll_timeout_memory - Periodically poll a host memory address
- * until it is not zero or a timeout occurs
- * @hdev: pointer to habanalabs device structure
- * @addr: Address to poll
- * @timeout_us: timeout in us
- * @val: Variable to read the value into
- *
- * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
- * case, the last read value at @addr is stored in @val. Must not
- * be called from atomic context if sleep_us or timeout_us are used.
- *
- * The function sleeps for 100us with timeout value of
- * timeout_us
- */
-int hl_poll_timeout_memory(struct hl_device *hdev, u64 addr,
- u32 timeout_us, u32 *val)
-{
- /*
- * address in this function points always to a memory location in the
- * host's (server's) memory. That location is updated asynchronously
- * either by the direct access of the device or by another core
- */
- u32 *paddr = (u32 *) (uintptr_t) addr;
- ktime_t timeout;
-
- /* timeout should be longer when working with simulator */
- if (!hdev->pdev)
- timeout_us *= 10;
-
- timeout = ktime_add_us(ktime_get(), timeout_us);
-
- might_sleep();
-
- for (;;) {
- /*
- * Flush CPU read/write buffers to make sure we read updates
- * done by other cores or by the device
- */
- mb();
- *val = *paddr;
- if (*val)
- break;
- if (ktime_compare(ktime_get(), timeout) > 0) {
- *val = *paddr;
- break;
- }
- usleep_range((100 >> 2) + 1, 100);
- }
-
- return *val ? 0 : -ETIMEDOUT;
-}
-
-/*
- * hl_poll_timeout_devicememory - Periodically poll a device memory address
- * until it is not zero or a timeout occurs
- * @hdev: pointer to habanalabs device structure
- * @addr: Device address to poll
- * @timeout_us: timeout in us
- * @val: Variable to read the value into
- *
- * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
- * case, the last read value at @addr is stored in @val. Must not
- * be called from atomic context if sleep_us or timeout_us are used.
- *
- * The function sleeps for 100us with timeout value of
- * timeout_us
- */
-int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr,
- u32 timeout_us, u32 *val)
-{
- ktime_t timeout = ktime_add_us(ktime_get(), timeout_us);
-
- might_sleep();
-
- for (;;) {
- *val = readl(addr);
- if (*val)
- break;
- if (ktime_compare(ktime_get(), timeout) > 0) {
- *val = readl(addr);
- break;
- }
- usleep_range((100 >> 2) + 1, 100);
- }
-
- return *val ? 0 : -ETIMEDOUT;
-}
-
-/*
* MMIO register access helper functions.
*/
diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c
index eda5d7fcb79f..cc8168bacb24 100644
--- a/drivers/misc/habanalabs/firmware_if.c
+++ b/drivers/misc/habanalabs/firmware_if.c
@@ -29,13 +29,13 @@ int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name,
rc = request_firmware(&fw, fw_name, hdev->dev);
if (rc) {
- dev_err(hdev->dev, "Failed to request %s\n", fw_name);
+ dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name);
goto out;
}
fw_size = fw->size;
if ((fw_size % 4) != 0) {
- dev_err(hdev->dev, "illegal %s firmware size %zu\n",
+ dev_err(hdev->dev, "Illegal %s firmware size %zu\n",
fw_name, fw_size);
rc = -EINVAL;
goto out;
@@ -85,12 +85,6 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
u32 tmp;
int rc = 0;
- if (len > HL_CPU_CB_SIZE) {
- dev_err(hdev->dev, "Invalid CPU message size of %d bytes\n",
- len);
- return -ENOMEM;
- }
-
pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
&pkt_dma_addr);
if (!pkt) {
@@ -117,33 +111,28 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
goto out;
}
- rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) &pkt->fence,
- timeout, &tmp);
+ rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
+ (tmp == ARMCP_PACKET_FENCE_VAL), 1000, timeout);
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
if (rc == -ETIMEDOUT) {
- dev_err(hdev->dev, "Timeout while waiting for device CPU\n");
+ dev_err(hdev->dev, "Device CPU packet timeout (0x%x)\n", tmp);
hdev->device_cpu_disabled = true;
goto out;
}
- if (tmp == ARMCP_PACKET_FENCE_VAL) {
- u32 ctl = le32_to_cpu(pkt->ctl);
+ tmp = le32_to_cpu(pkt->ctl);
- rc = (ctl & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT;
- if (rc) {
- dev_err(hdev->dev,
- "F/W ERROR %d for CPU packet %d\n",
- rc, (ctl & ARMCP_PKT_CTL_OPCODE_MASK)
+ rc = (tmp & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT;
+ if (rc) {
+ dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
+ rc,
+ (tmp & ARMCP_PKT_CTL_OPCODE_MASK)
>> ARMCP_PKT_CTL_OPCODE_SHIFT);
- rc = -EINVAL;
- } else if (result) {
- *result = (long) le64_to_cpu(pkt->result);
- }
- } else {
- dev_err(hdev->dev, "CPU packet wrong fence value\n");
- rc = -EINVAL;
+ rc = -EIO;
+ } else if (result) {
+ *result = (long) le64_to_cpu(pkt->result);
}
out:
@@ -186,9 +175,6 @@ void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
{
u64 kernel_addr;
- /* roundup to HL_CPU_PKT_SIZE */
- size = (size + (HL_CPU_PKT_SIZE - 1)) & HL_CPU_PKT_MASK;
-
kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
*dma_handle = hdev->cpu_accessible_dma_address +
@@ -200,9 +186,6 @@ void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
void *vaddr)
{
- /* roundup to HL_CPU_PKT_SIZE */
- size = (size + (HL_CPU_PKT_SIZE - 1)) & HL_CPU_PKT_MASK;
-
gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
size);
}
@@ -256,7 +239,7 @@ int hl_fw_armcp_info_get(struct hl_device *hdev)
HL_ARMCP_INFO_TIMEOUT_USEC, &result);
if (rc) {
dev_err(hdev->dev,
- "Failed to send armcp info pkt, error %d\n", rc);
+ "Failed to send ArmCP info pkt, error %d\n", rc);
goto out;
}
@@ -291,7 +274,7 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
max_size, &eeprom_info_dma_addr);
if (!eeprom_info_cpu_addr) {
dev_err(hdev->dev,
- "Failed to allocate DMA memory for EEPROM info packet\n");
+ "Failed to allocate DMA memory for ArmCP EEPROM packet\n");
return -ENOMEM;
}
@@ -307,7 +290,7 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
if (rc) {
dev_err(hdev->dev,
- "Failed to send armcp EEPROM pkt, error %d\n", rc);
+ "Failed to send ArmCP EEPROM packet, error %d\n", rc);
goto out;
}
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 02d116b01a1a..75294ec65257 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -14,6 +14,8 @@
#include <linux/genalloc.h>
#include <linux/hwmon.h>
#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/iommu.h>
+#include <linux/seq_file.h>
/*
* GOYA security scheme:
@@ -89,6 +91,30 @@
#define GOYA_CB_POOL_CB_CNT 512
#define GOYA_CB_POOL_CB_SIZE 0x20000 /* 128KB */
+#define IS_QM_IDLE(engine, qm_glbl_sts0) \
+ (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
+#define IS_DMA_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(DMA, qm_glbl_sts0)
+#define IS_TPC_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(TPC, qm_glbl_sts0)
+#define IS_MME_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(MME, qm_glbl_sts0)
+
+#define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
+ (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
+ engine##_CMDQ_IDLE_MASK)
+#define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
+ IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
+#define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
+ IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
+
+#define IS_DMA_IDLE(dma_core_sts0) \
+ !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
+
+#define IS_TPC_IDLE(tpc_cfg_sts) \
+ (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
+
+#define IS_MME_IDLE(mme_arch_sts) \
+ (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
+
+
static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
"goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
"goya cq 4", "goya cpu eq"
@@ -297,6 +323,11 @@ static u32 goya_all_events[] = {
GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
};
+static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
+static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
+static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
+static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
+
void goya_get_fixed_properties(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -467,7 +498,7 @@ static int goya_early_init(struct hl_device *hdev)
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
- rc = hl_pci_init(hdev, 39);
+ rc = hl_pci_init(hdev, 48);
if (rc)
return rc;
@@ -539,9 +570,36 @@ int goya_late_init(struct hl_device *hdev)
struct asic_fixed_properties *prop = &hdev->asic_prop;
int rc;
+ goya_fetch_psoc_frequency(hdev);
+
+ rc = goya_mmu_clear_pgt_range(hdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to clear MMU page tables range %d\n", rc);
+ return rc;
+ }
+
+ rc = goya_mmu_set_dram_default_page(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
+ return rc;
+ }
+
+ rc = goya_mmu_add_mappings_for_device_cpu(hdev);
+ if (rc)
+ return rc;
+
+ rc = goya_init_cpu_queues(hdev);
+ if (rc)
+ return rc;
+
+ rc = goya_test_cpu_queue(hdev);
+ if (rc)
+ return rc;
+
rc = goya_armcp_info_get(hdev);
if (rc) {
- dev_err(hdev->dev, "Failed to get armcp info\n");
+ dev_err(hdev->dev, "Failed to get armcp info %d\n", rc);
return rc;
}
@@ -553,33 +611,15 @@ int goya_late_init(struct hl_device *hdev)
rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
if (rc) {
- dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
+ dev_err(hdev->dev,
+ "Failed to enable PCI access from CPU %d\n", rc);
return rc;
}
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
- goya_fetch_psoc_frequency(hdev);
-
- rc = goya_mmu_clear_pgt_range(hdev);
- if (rc) {
- dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
- goto disable_pci_access;
- }
-
- rc = goya_mmu_set_dram_default_page(hdev);
- if (rc) {
- dev_err(hdev->dev, "Failed to set DRAM default page\n");
- goto disable_pci_access;
- }
-
return 0;
-
-disable_pci_access:
- hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
-
- return rc;
}
/*<