From d95f87d29cf2097485765baac5109cecb486b5ee Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 11 Apr 2023 10:08:50 -0400 Subject: accel/habanalabs: remove variable gaudi_irq_name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc with W=1 reports drivers/accel/habanalabs/gaudi/gaudi.c:117:19: error: ‘gaudi_irq_name’ defined but not used [-Werror=unused-const-variable=] 117 | static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { | ^~~~~~~~~~~~~~ This variable is not used so remove it. Signed-off-by: Tom Rix Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/gaudi/gaudi.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c index a29aa8f7b6f3..a1697581c218 100644 --- a/drivers/accel/habanalabs/gaudi/gaudi.c +++ b/drivers/accel/habanalabs/gaudi/gaudi.c @@ -114,13 +114,6 @@ static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = { GAUDI_QUEUE_ID_DMA_1_3 }; -static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { - "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", - "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", - "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3", - "gaudi cpu eq" -}; - static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, -- cgit v1.2.3 From 1464fbd8bab958b771af3214bd5cb52cd5f612ed Mon Sep 17 00:00:00 2001 From: Tal Cohen Date: Thu, 30 Mar 2023 13:38:19 +0300 Subject: accel/habanalabs: ignore false positive razwi In Gaudi2 asic, PSOC RAZWI may cause in HBW or LBW. The address that caused the error is read from HW register and printed by the Driver. There are cases where the Driver receives an indication on PSOC RAZWI error but the address value is zero. In that case, the indication is a false positive. The Driver should not "count" a PSOC RAZWI event error when the caused the address is zeroed. Signed-off-by: Tal Cohen Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/gaudi2/gaudi2.c | 43 ++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index b778cf764a68..6f05aa230376 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -8251,6 +8251,7 @@ static bool gaudi2_handle_psoc_razwi_happened(struct hl_device *hdev, u32 razwi_ u16 num_of_eng, eng_id[PSOC_RAZWI_MAX_ENG_PER_RTR]; char eng_name_str[PSOC_RAZWI_ENG_STR_SIZE]; bool razwi_happened = false; + u64 addr; int i; num_of_eng = gaudi2_psoc_razwi_get_engines(common_razwi_info, ARRAY_SIZE(common_razwi_info), @@ -8269,43 +8270,53 @@ static bool gaudi2_handle_psoc_razwi_happened(struct hl_device *hdev, u32 razwi_ if (RREG32(base[i] + DEC_RAZWI_HBW_AW_SET)) { addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_HI); addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_LO); - dev_err(hdev->dev, + addr = ((u64)addr_hi << 32) + addr_lo; + if (addr) { + dev_err(hdev->dev, "PSOC HBW AW RAZWI: %s, address (aligned to 128 byte): 0x%llX\n", - eng_name_str, ((u64)addr_hi << 32) + addr_lo); - hl_handle_razwi(hdev, ((u64)addr_hi << 32) + addr_lo, &eng_id[0], + eng_name_str, addr); + hl_handle_razwi(hdev, addr, &eng_id[0], num_of_eng, HL_RAZWI_HBW | HL_RAZWI_WRITE, event_mask); - razwi_happened = true; + razwi_happened = true; + } } if (RREG32(base[i] + DEC_RAZWI_HBW_AR_SET)) { addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_HI); addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_LO); - dev_err(hdev->dev, + addr = ((u64)addr_hi << 32) + addr_lo; + if (addr) { + dev_err(hdev->dev, "PSOC HBW AR RAZWI: %s, address (aligned to 128 byte): 0x%llX\n", - eng_name_str, ((u64)addr_hi << 32) + addr_lo); - hl_handle_razwi(hdev, ((u64)addr_hi << 32) + addr_lo, &eng_id[0], + eng_name_str, addr); + hl_handle_razwi(hdev, addr, &eng_id[0], num_of_eng, HL_RAZWI_HBW | HL_RAZWI_READ, event_mask); - razwi_happened = true; + razwi_happened = true; + } } if (RREG32(base[i] + DEC_RAZWI_LBW_AW_SET)) { addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AW_ADDR); - dev_err(hdev->dev, + if (addr_lo) { + dev_err(hdev->dev, "PSOC LBW AW RAZWI: %s, address (aligned to 128 byte): 0x%X\n", eng_name_str, addr_lo); - hl_handle_razwi(hdev, addr_lo, &eng_id[0], + hl_handle_razwi(hdev, addr_lo, &eng_id[0], num_of_eng, HL_RAZWI_LBW | HL_RAZWI_WRITE, event_mask); - razwi_happened = true; + razwi_happened = true; + } } if (RREG32(base[i] + DEC_RAZWI_LBW_AR_SET)) { addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AR_ADDR); - dev_err(hdev->dev, - "PSOC LBW AR RAZWI: %s, address (aligned to 128 byte): 0x%X\n", - eng_name_str, addr_lo); - hl_handle_razwi(hdev, addr_lo, &eng_id[0], + if (addr_lo) { + dev_err(hdev->dev, + "PSOC LBW AR RAZWI: %s, address (aligned to 128 byte): 0x%X\n", + eng_name_str, addr_lo); + hl_handle_razwi(hdev, addr_lo, &eng_id[0], num_of_eng, HL_RAZWI_LBW | HL_RAZWI_READ, event_mask); - razwi_happened = true; + razwi_happened = true; + } } /* In common case the loop will break, when there is only one engine id, or * several engines with the same router. The exceptional case is with psoc razwi -- cgit v1.2.3 From 9ce36082c1723d96603c667436a88866b3fde531 Mon Sep 17 00:00:00 2001 From: Rakesh Ughreja Date: Wed, 5 Apr 2023 09:12:29 +0300 Subject: accel/habanalabs: allow user to modify EDMA RL register EDMA transpose workload requires to signal for every activation. User FW sends all the dummy signals to RD_LBW_RATE_LIM_CFG, to save lbw bandwidth. We need the user to be able to access that register to configure it. Signed-off-by: Rakesh Ughreja Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/gaudi2/gaudi2_security.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c index 694735f9e6e6..acd33130e7f9 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c @@ -479,6 +479,7 @@ static const u32 gaudi2_pb_dcr0_edma0_unsecured_regs[] = { mmDCORE0_EDMA0_CORE_CTX_TE_NUMROWS, mmDCORE0_EDMA0_CORE_CTX_IDX, mmDCORE0_EDMA0_CORE_CTX_IDX_INC, + mmDCORE0_EDMA0_CORE_RD_LBW_RATE_LIM_CFG, mmDCORE0_EDMA0_QM_CQ_CFG0_0, mmDCORE0_EDMA0_QM_CQ_CFG0_1, mmDCORE0_EDMA0_QM_CQ_CFG0_2, -- cgit v1.2.3 From 574ee40f514453d09ea2c5f3882b3b369b5ba027 Mon Sep 17 00:00:00 2001 From: Koby Elbaz Date: Sun, 9 Apr 2023 14:24:05 +0300 Subject: accel/habanalabs: remove commented code that won't be used Once it was decided that these security settings are to be done by FW rather than by the driver, there's no reason to keep them in the code. Signed-off-by: Koby Elbaz Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/gaudi2/gaudi2_security.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c index acd33130e7f9..52a12c2bb58e 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c @@ -3443,15 +3443,6 @@ static int gaudi2_init_protection_bits(struct hl_device *hdev) ARRAY_SIZE(gaudi2_pb_thermal_sensor0), NULL, HL_PB_NA); } - /* HBM */ - /* Temporarily skip until SW-63348 is solved - * instance_offset = mmHBM1_MC0_BASE - mmHBM0_MC0_BASE; - * rc |= hl_init_pb_with_mask(hdev, HL_PB_SHARED, HL_PB_NA, GAUDI2_HBM_NUM, - * instance_offset, gaudi2_pb_hbm, - * ARRAY_SIZE(gaudi2_pb_hbm), NULL, HL_PB_NA, - * prop->dram_enabled_mask); - */ - /* Scheduler ARCs */ instance_offset = mmARC_FARM_ARC1_AUX_BASE - mmARC_FARM_ARC0_AUX_BASE; rc |= hl_init_pb_ranges(hdev, HL_PB_SHARED, HL_PB_NA, -- cgit v1.2.3 From f9b60242af3e2216ed03139bc2c14a02c54073ce Mon Sep 17 00:00:00 2001 From: Moti Haimovski Date: Mon, 10 Apr 2023 09:31:16 +0300 Subject: accel/habanalabs: fix bug in free scratchpad memory This commit fixes a bug in Gaudi2 when freeing the scratchpad memory in case software init fails. Signed-off-by: Moti Haimovski Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/gaudi2/gaudi2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index 6f05aa230376..acc304ebca82 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -3630,8 +3630,8 @@ static int gaudi2_sw_init(struct hl_device *hdev) special_blocks_free: gaudi2_special_blocks_iterator_free(hdev); free_scratchpad_mem: - hl_asic_dma_pool_free(hdev, gaudi2->scratchpad_kernel_address, - gaudi2->scratchpad_bus_address); + hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address, + gaudi2->scratchpad_bus_address); free_virt_msix_db_mem: hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr); free_cpu_accessible_dma_pool: -- cgit v1.2.3 From 9ef23f05aed4e1dd89b70cd2b8a5e35134ab90bb Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Wed, 22 Mar 2023 08:38:49 +0200 Subject: accel/habanalabs: add helper to extract the FW major/minor the helper is extract_u32_until_given_char and can later be used to also get the major/minor of the sw version. Signed-off-by: Dafna Hirschfeld Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/common/firmware_if.c | 69 +++++++++++++++++---------- 1 file changed, 45 insertions(+), 24 deletions(-) diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c index 59f61ec66445..1400a4430045 100644 --- a/drivers/accel/habanalabs/common/firmware_if.c +++ b/drivers/accel/habanalabs/common/firmware_if.c @@ -71,38 +71,59 @@ free_fw_ver: return NULL; } -static int hl_get_preboot_major_minor(struct hl_device *hdev, char *preboot_ver) +/** + * extract_u32_until_given_char() - given a string of the format "*", extract the u32. + * @str: the given string + * @ver_num: the pointer to the extracted u32 to be returned to the caller. + * @given_char: the given char at the end of the u32 in the string + * + * Return: Upon success, return a pointer to the given_char in the string. Upon failure, return NULL + */ +static char *extract_u32_until_given_char(char *str, u32 *ver_num, char given_char) { - char major[8], minor[8], *first_dot, *second_dot; - int rc; + char num_str[8] = {}, *ch; - first_dot = strnstr(preboot_ver, ".", 10); - if (first_dot) { - strscpy(major, preboot_ver, first_dot - preboot_ver + 1); - rc = kstrtou32(major, 10, &hdev->fw_major_version); - } else { - rc = -EINVAL; - } + ch = strchrnul(str, given_char); + if (*ch == '\0' || ch == str || ch - str >= sizeof(num_str)) + return NULL; - if (rc) { - dev_err(hdev->dev, "Error %d parsing preboot major version\n", rc); - return rc; + memcpy(num_str, str, ch - str); + if (kstrtou32(num_str, 10, ver_num)) + return NULL; + return ch; +} + +/** + * hl_get_preboot_major_minor() - extract the FW's version major, minor from the version string. + * @hdev: pointer to the hl_device + * @preboot_ver: the FW's version string + * + * preboot_ver is expected to be the format of ..*, e.g: 42.0.1-sec-3 + * The extracted version is set in the hdev fields: fw_inner_{major/minor}_ver. + * + * Return: 0 on success, negative error code for failure. + */ +static int hl_get_preboot_major_minor(struct hl_device *hdev, char *preboot_ver) +{ + preboot_ver = extract_u32_until_given_char(preboot_ver, &hdev->fw_major_version, '.'); + if (!preboot_ver) { + dev_err(hdev->dev, "Error parsing preboot major version\n"); + goto err_zero_ver; } - /* skip the first dot */ - first_dot++; + preboot_ver++; - second_dot = strnstr(first_dot, ".", 10); - if (second_dot) { - strscpy(minor, first_dot, second_dot - first_dot + 1); - rc = kstrtou32(minor, 10, &hdev->fw_minor_version); - } else { - rc = -EINVAL; + preboot_ver = extract_u32_until_given_char(preboot_ver, &hdev->fw_minor_version, '.'); + if (!preboot_ver) { + dev_err(hdev->dev, "Error parsing preboot minor version\n"); + goto err_zero_ver; } + return 0; - if (rc) - dev_err(hdev->dev, "Error %d parsing preboot minor version\n", rc); - return rc; +err_zero_ver: + hdev->fw_major_version = 0; + hdev->fw_minor_version = 0; + return -EINVAL; } static int hl_request_fw(struct hl_device *hdev, -- cgit v1.2.3 From 3071247ca06195eed4261b99035e0c031fa5ddd0 Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Sun, 2 Apr 2023 12:05:43 +0300 Subject: accel/habanalabs: rename fw_{major/minor}_version to fw_inner_{major/minor}_ver We later want to add fields for Firmware SW version. The current extracted FW version is the inner FW versioning so the new name is better and also better differentiate from the FW's SW version. Signed-off-by: Dafna Hirschfeld Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/common/firmware_if.c | 8 ++++---- drivers/accel/habanalabs/common/habanalabs.h | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c index 1400a4430045..6150ab6ba810 100644 --- a/drivers/accel/habanalabs/common/firmware_if.c +++ b/drivers/accel/habanalabs/common/firmware_if.c @@ -105,7 +105,7 @@ static char *extract_u32_until_given_char(char *str, u32 *ver_num, char given_ch */ static int hl_get_preboot_major_minor(struct hl_device *hdev, char *preboot_ver) { - preboot_ver = extract_u32_until_given_char(preboot_ver, &hdev->fw_major_version, '.'); + preboot_ver = extract_u32_until_given_char(preboot_ver, &hdev->fw_inner_major_ver, '.'); if (!preboot_ver) { dev_err(hdev->dev, "Error parsing preboot major version\n"); goto err_zero_ver; @@ -113,7 +113,7 @@ static int hl_get_preboot_major_minor(struct hl_device *hdev, char *preboot_ver) preboot_ver++; - preboot_ver = extract_u32_until_given_char(preboot_ver, &hdev->fw_minor_version, '.'); + preboot_ver = extract_u32_until_given_char(preboot_ver, &hdev->fw_inner_minor_ver, '.'); if (!preboot_ver) { dev_err(hdev->dev, "Error parsing preboot minor version\n"); goto err_zero_ver; @@ -121,8 +121,8 @@ static int hl_get_preboot_major_minor(struct hl_device *hdev, char *preboot_ver) return 0; err_zero_ver: - hdev->fw_major_version = 0; - hdev->fw_minor_version = 0; + hdev->fw_inner_major_ver = 0; + hdev->fw_inner_minor_ver = 0; return -EINVAL; } diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index eaae69a9f817..57661cb51621 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -3225,8 +3225,8 @@ struct hl_reset_info { * @captured_err_info: holds information about errors. * @reset_info: holds current device reset information. * @stream_master_qid_arr: pointer to array with QIDs of master streams. - * @fw_major_version: major version of current loaded preboot. - * @fw_minor_version: minor version of current loaded preboot. + * @fw_inner_major_ver: the major of current loaded preboot inner version. + * @fw_inner_minor_ver: the minor of current loaded preboot inner version. * @dram_used_mem: current DRAM memory consumption. * @memory_scrub_val: the value to which the dram will be scrubbed to using cb scrub_device_dram * @timeout_jiffies: device CS timeout value. @@ -3412,8 +3412,8 @@ struct hl_device { struct hl_reset_info reset_info; u32 *stream_master_qid_arr; - u32 fw_major_version; - u32 fw_minor_version; + u32 fw_inner_major_ver; + u32 fw_inner_minor_ver; atomic64_t dram_used_mem; u64 memory_scrub_val; u64 timeout_jiffies; @@ -3549,7 +3549,7 @@ struct hl_ioctl_desc { static inline bool hl_is_fw_ver_below_1_9(struct hl_device *hdev) { - return (hdev->fw_major_version < 42); + return (hdev->fw_inner_major_ver < 42); } /* -- cgit v1.2.3 From dd5667ff6f9c52c6a49aa9725b80542793613036 Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Thu, 16 Mar 2023 10:20:44 +0200 Subject: accel/habanalabs: extract and save the FW's SW major/minor/sub-minor It is not always possible to know the FW's SW version from the inner FW version. Therefore we should extract the general SW version in addition to the FW version and use it in functions like 'hl_is_fw_ver_below_1_9' etc. Signed-off-by: Dafna Hirschfeld Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/common/firmware_if.c | 78 ++++++++++++++++++++++++--- drivers/accel/habanalabs/common/habanalabs.h | 6 +++ 2 files changed, 78 insertions(+), 6 deletions(-) diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c index 6150ab6ba810..62052cfe694f 100644 --- a/drivers/accel/habanalabs/common/firmware_if.c +++ b/drivers/accel/habanalabs/common/firmware_if.c @@ -93,6 +93,71 @@ static char *extract_u32_until_given_char(char *str, u32 *ver_num, char given_ch return ch; } +/** + * hl_get_sw_major_minor_subminor() - extract the FW's SW version major, minor, sub-minor + * from the version string + * @hdev: pointer to the hl_device + * @fw_str: the FW's version string + * + * The extracted version is set in the hdev fields: fw_sw_{major/minor/sub_minor}_ver. + * + * fw_str is expected to have one of two possible formats, examples: + * 1) 'Preboot version hl-gaudi2-1.9.0-fw-42.0.1-sec-3' + * 2) 'Preboot version hl-gaudi2-1.9.0-rc-fw-42.0.1-sec-3' + * In those examples, the SW major,minor,subminor are correspondingly: 1,9,0. + * + * Return: 0 for success or a negative error code for failure. + */ +static int hl_get_sw_major_minor_subminor(struct hl_device *hdev, const char *fw_str) +{ + char *end, *start; + + end = strnstr(fw_str, "-rc-", VERSION_MAX_LEN); + if (end == fw_str) + return -EINVAL; + + if (!end) + end = strnstr(fw_str, "-fw-", VERSION_MAX_LEN); + + if (end == fw_str) + return -EINVAL; + + if (!end) + return -EINVAL; + + for (start = end - 1; start != fw_str; start--) { + if (*start == '-') + break; + } + + if (start == fw_str) + return -EINVAL; + + /* start/end point each to the starting and ending hyphen of the sw version e.g. -1.9.0- */ + start++; + start = extract_u32_until_given_char(start, &hdev->fw_sw_major_ver, '.'); + if (!start) + goto err_zero_ver; + + start++; + start = extract_u32_until_given_char(start, &hdev->fw_sw_minor_ver, '.'); + if (!start) + goto err_zero_ver; + + start++; + start = extract_u32_until_given_char(start, &hdev->fw_sw_sub_minor_ver, '-'); + if (!start) + goto err_zero_ver; + + return 0; + +err_zero_ver: + hdev->fw_sw_major_ver = 0; + hdev->fw_sw_minor_ver = 0; + hdev->fw_sw_sub_minor_ver = 0; + return -EINVAL; +} + /** * hl_get_preboot_major_minor() - extract the FW's version major, minor from the version string. * @hdev: pointer to the hl_device @@ -2172,6 +2237,7 @@ static int hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev, struct asic_fixed_properties *prop = &hdev->asic_prop; char *preboot_ver, *boot_ver; char btl_ver[32]; + int rc; switch (fwc) { case FW_COMP_BOOT_FIT: @@ -2185,20 +2251,20 @@ static int hl_fw_dynamic_read_device_fw_version(struct hl_device *hdev, break; case FW_COMP_PREBOOT: strscpy(prop->preboot_ver, fw_version, VERSION_MAX_LEN); - preboot_ver = strnstr(prop->preboot_ver, "Preboot", - VERSION_MAX_LEN); + preboot_ver = strnstr(prop->preboot_ver, "Preboot", VERSION_MAX_LEN); + dev_info(hdev->dev, "preboot full version: '%s'\n", preboot_ver); + if (preboot_ver && preboot_ver != prop->preboot_ver) { strscpy(btl_ver, prop->preboot_ver, min((int) (preboot_ver - prop->preboot_ver), 31)); dev_info(hdev->dev, "%s\n", btl_ver); } + rc = hl_get_sw_major_minor_subminor(hdev, preboot_ver); + if (rc) + return rc; preboot_ver = extract_fw_ver_from_str(prop->preboot_ver); if (preboot_ver) { - int rc; - - dev_info(hdev->dev, "preboot version %s\n", preboot_ver); - rc = hl_get_preboot_major_minor(hdev, preboot_ver); kfree(preboot_ver); if (rc) diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index 57661cb51621..d6f454b1cde4 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -3227,6 +3227,9 @@ struct hl_reset_info { * @stream_master_qid_arr: pointer to array with QIDs of master streams. * @fw_inner_major_ver: the major of current loaded preboot inner version. * @fw_inner_minor_ver: the minor of current loaded preboot inner version. + * @fw_sw_major_ver: the major of current loaded preboot SW version. + * @fw_sw_minor_ver: the minor of current loaded preboot SW version. + * @fw_sw_sub_minor_ver: the sub-minor of current loaded preboot SW version. * @dram_used_mem: current DRAM memory consumption. * @memory_scrub_val: the value to which the dram will be scrubbed to using cb scrub_device_dram * @timeout_jiffies: device CS timeout value. @@ -3414,6 +3417,9 @@ struct hl_device { u32 *stream_master_qid_arr; u32 fw_inner_major_ver; u32 fw_inner_minor_ver; + u32 fw_sw_major_ver; + u32 fw_sw_minor_ver; + u32 fw_sw_sub_minor_ver; atomic64_t dram_used_mem; u64 memory_scrub_val; u64 timeout_jiffies; -- cgit v1.2.3 From a12428acf823a1a367d38a4113b7365e3e5603bf Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Tue, 18 Apr 2023 11:39:44 +0300 Subject: accel/habanalabs: check fw version using sw version The fw inner version is less trustable, instead use the fw general sw release version. Signed-off-by: Dafna Hirschfeld Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/common/habanalabs.h | 10 ++++++++-- drivers/accel/habanalabs/gaudi2/gaudi2.c | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index d6f454b1cde4..02620654ccdf 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -3553,9 +3553,15 @@ struct hl_ioctl_desc { hl_ioctl_t *func; }; -static inline bool hl_is_fw_ver_below_1_9(struct hl_device *hdev) +static inline bool hl_is_fw_sw_ver_below(struct hl_device *hdev, u32 fw_sw_major, u32 fw_sw_minor) { - return (hdev->fw_inner_major_ver < 42); + if (hdev->fw_sw_major_ver < fw_sw_major) + return true; + if (hdev->fw_sw_major_ver > fw_sw_major) + return false; + if (hdev->fw_sw_minor_ver < fw_sw_minor) + return true; + return false; } /* diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index acc304ebca82..6e3aa89b19c2 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -8043,7 +8043,7 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev, case RAZWI_TPC: hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx]; - if (hl_is_fw_ver_below_1_9(hdev) && + if (hl_is_fw_sw_ver_below(hdev, 1, 9) && !hdev->asic_prop.fw_security_enabled && ((module_idx == 0) || (module_idx == 1))) lbw_rtr_id = DCORE0_RTR0; -- cgit v1.2.3 From cc7b790d412461520de49eb321a0aeed2735e5c4 Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Wed, 8 Feb 2023 16:16:08 +0200 Subject: accel/habanalabs: do soft-reset using cpucp packet This is done depending on the FW version. The cpucp method is preferable and saves scratchpads resource. Signed-off-by: Dafna Hirschfeld Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/common/firmware_if.c | 14 ++++++++++++ drivers/accel/habanalabs/common/habanalabs.h | 1 + drivers/accel/habanalabs/gaudi2/gaudi2.c | 26 +++++++++++++--------- drivers/accel/habanalabs/include/common/cpucp_if.h | 4 ++++ 4 files changed, 35 insertions(+), 10 deletions(-) diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c index 62052cfe694f..e48f024d8649 100644 --- a/drivers/accel/habanalabs/common/firmware_if.c +++ b/drivers/accel/habanalabs/common/firmware_if.c @@ -591,6 +591,20 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, size); } +int hl_fw_send_soft_reset(struct hl_device *hdev) +{ + struct cpucp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_SOFT_RESET << CPUCP_PKT_CTL_OPCODE_SHIFT); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); + if (rc) + dev_err(hdev->dev, "failed to send soft-reset msg (err = %d)\n", rc); + + return rc; +} + int hl_fw_send_device_activity(struct hl_device *hdev, bool open) { struct cpucp_packet pkt; diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index 02620654ccdf..6b102947cb90 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -3884,6 +3884,7 @@ int hl_fw_dram_replaced_row_get(struct hl_device *hdev, int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num); int hl_fw_cpucp_engine_core_asid_set(struct hl_device *hdev, u32 asid); int hl_fw_send_device_activity(struct hl_device *hdev, bool open); +int hl_fw_send_soft_reset(struct hl_device *hdev); int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], bool is_wc[3]); int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data); diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index 6e3aa89b19c2..5c80e7af5b78 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -6148,18 +6148,24 @@ static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_perform u32 poll_timeout_us) { struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; + int rc = 0; if (!driver_performs_reset) { - /* set SP to indicate reset request sent to FW */ - if (dyn_regs->cpu_rst_status) - WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA); - else - WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA); - - WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq), - gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id); - - return gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us); + if (hl_is_fw_sw_ver_below(hdev, 1, 10)) { + /* set SP to indicate reset request sent to FW */ + if (dyn_regs->cpu_rst_status) + WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA); + else + WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA); + WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq), + gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id); + + /* wait for f/w response */ + rc = gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us); + } else { + rc = hl_fw_send_soft_reset(hdev); + } + return rc; } /* Block access to engines, QMANs and SM during reset, these diff --git a/drivers/accel/habanalabs/include/common/cpucp_if.h b/drivers/accel/habanalabs/include/common/cpucp_if.h index 8bbe685458c4..f68308cc2524 100644 --- a/drivers/accel/habanalabs/include/common/cpucp_if.h +++ b/drivers/accel/habanalabs/include/common/cpucp_if.h @@ -665,6 +665,9 @@ enum pq_init_status { * * CPUCP_PACKET_REGISTER_INTERRUPTS - * Packet to register interrupts indicating LKD is ready to receive events from FW. + * + * CPUCP_PACKET_SOFT_RESET - + * Packet to perform soft-reset. */ enum cpucp_packet_id { @@ -731,6 +734,7 @@ enum cpucp_packet_id { CPUCP_PACKET_RESERVED11, /* not used */ CPUCP_PACKET_RESERVED12, /* internal */ CPUCP_PACKET_REGISTER_INTERRUPTS, /* internal */ + CPUCP_PACKET_SOFT_RESET, /* internal */ CPUCP_PACKET_ID_MAX /* must be last */ }; -- cgit v1.2.3 From 57469c12060899b7b0768ace178c7e36501e2f21 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Thu, 13 Apr 2023 11:22:06 +0300 Subject: accel/habanalabs: unsecure TPC bias registers User needs to be able to perform downcast / upcast of fp8_143 dtype. Hence bias register needs to be accessed by the user. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/gaudi2/gaudi2_security.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c index 52a12c2bb58e..fadb870ff4c0 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c @@ -1542,6 +1542,7 @@ static const u32 gaudi2_pb_dcr0_tpc0_unsecured_regs[] = { mmDCORE0_TPC0_CFG_LUT_FUNC128_BASE2_ADDR_HI, mmDCORE0_TPC0_CFG_LUT_FUNC256_BASE2_ADDR_LO, mmDCORE0_TPC0_CFG_LUT_FUNC256_BASE2_ADDR_HI, + mmDCORE0_TPC0_CFG_FP8_143_BIAS, mmDCORE0_TPC0_CFG_ROUND_CSR, mmDCORE0_TPC0_CFG_CONV_ROUND_CSR, mmDCORE0_TPC0_CFG_SEMAPHORE, -- cgit v1.2.3 From 04729b418f8b031712e6dbf5bffa7d639f8f211e Mon Sep 17 00:00:00 2001 From: Moti Haimovski Date: Thu, 13 Apr 2023 13:54:40 +0300 Subject: accel/habanalabs: call to HW/FW err returns 0 when no events exist This commit modifies the call to retrieve HW or FW error events to return success when no events are pending, as done in the calls to other events. Signed-off-by: Moti Haimovski Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/common/habanalabs_ioctl.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c index 203ee857810c..4368e6c9a23a 100644 --- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c @@ -842,15 +842,15 @@ static int hw_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args) struct hw_err_info *info; int rc; - if ((!user_buf_size) || (!user_buf)) + if (!user_buf) return -EINVAL; - if (user_buf_size < sizeof(struct hl_info_hw_err_event)) - return -ENOMEM; - info = &hdev->captured_err_info.hw_err; if (!info->event_info_available) - return -ENOENT; + return 0; + + if (user_buf_size < sizeof(struct hl_info_hw_err_event)) + return -ENOMEM; rc = copy_to_user(user_buf, &info->event, sizeof(struct hl_info_hw_err_event)); return rc ? -EFAULT : 0; @@ -864,15 +864,15 @@ static int fw_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args) struct fw_err_info *info; int rc; - if ((!user_buf_size) || (!user_buf)) + if (!user_buf) return -EINVAL; - if (user_buf_size < sizeof(struct hl_info_fw_err_event)) - return -ENOMEM; - info = &hdev->captured_err_info.fw_err; if (!info->event_info_available) - return -ENOENT; + return 0; + + if (user_buf_size < sizeof(struct hl_info_fw_err_event)) + return -ENOMEM; rc = copy_to_user(user_buf, &info->event, sizeof(struct hl_info_fw_err_event)); return rc ? -EFAULT : 0; -- cgit v1.2.3 From ad8bfd3619bb096dd1929a69989120b45e3c3eee Mon Sep 17 00:00:00 2001 From: Koby Elbaz Date: Mon, 17 Apr 2023 15:14:30 +0300 Subject: accel/habanalabs: minimize encapsulation signal mutex lock time Sync Stream Encapsulated Signal Handlers can be managed from different contexts, and as such they are protected via a spin_lock. However, spin_lock was unnecessarily protecting a larger code section than really needed, covering a sleepable code section as well. Since spin_lock disables preemption, it could lead to sleeping in atomic context. Signed-off-by: Koby Elbaz Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/common/command_submission.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c index af9d2e22c6e7..977cd2710686 100644 --- a/drivers/accel/habanalabs/common/command_submission.c +++ b/drivers/accel/habanalabs/common/command_submission.c @@ -2152,7 +2152,7 @@ static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id) hdev->asic_funcs->hw_queues_unlock(hdev); rc = -EINVAL; - goto out; + goto out_unlock; } /* @@ -2167,15 +2167,21 @@ static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id) /* Release the id and free allocated memory of the handle */ idr_remove(&mgr->handles, handle_id); + + /* unlock before calling ctx_put, where we might sleep */ + spin_unlock(&mgr->lock); hl_ctx_put(encaps_sig_hdl->ctx); kfree(encaps_sig_hdl); + goto out; } else { rc = -EINVAL; dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n"); } -out: + +out_unlock: spin_unlock(&mgr->lock); +out: return rc; } -- cgit v1.2.3 From 9a4e44a4ee49dff6ded6a4c76bb921a8379cb7b4 Mon Sep 17 00:00:00 2001 From: Koby Elbaz Date: Sun, 4 Dec 2022 18:37:53 +0200 Subject: accel/habanalabs: refactor abort of completions and waits Aborting CS completions should be in command_submission.c but aborting waiting for user interrupts should be in device.c. This separation is also for adding more abort operations in the future. Signed-off-by: Koby Elbaz Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/common/command_submission.c | 3 +-- drivers/accel/habanalabs/common/device.c | 17 ++++++++++++----- drivers/accel/habanalabs/common/habanalabs.h | 2 +- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c index 977cd2710686..4ff3256fdf8b 100644 --- a/drivers/accel/habanalabs/common/command_submission.c +++ b/drivers/accel/habanalabs/common/command_submission.c @@ -1139,11 +1139,10 @@ static void force_complete_cs(struct hl_device *hdev) spin_unlock(&hdev->cs_mirror_lock); } -void hl_abort_waitings_for_completion(struct hl_device *hdev) +void hl_abort_waiting_for_cs_completions(struct hl_device *hdev) { force_complete_cs(hdev); force_complete_multi_cs(hdev); - hl_release_pending_user_interrupts(hdev); } static void job_wq_completion(struct work_struct *work) diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c index fabfc501ef54..c8f4a34d2831 100644 --- a/drivers/accel/habanalabs/common/device.c +++ b/drivers/accel/habanalabs/common/device.c @@ -1157,6 +1157,16 @@ static void take_release_locks(struct hl_device *hdev) mutex_unlock(&hdev->fpriv_ctrl_list_lock); } +static void hl_abort_waiting_for_completions(struct hl_device *hdev) +{ + hl_abort_waiting_for_cs_completions(hdev); + + /* Release all pending user interrupts, each pending user interrupt + * holds a reference to a user context. + */ + hl_release_pending_user_interrupts(hdev); +} + static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset, bool skip_wq_flush) { @@ -1176,10 +1186,7 @@ static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_r /* flush the MMU prefetch workqueue */ flush_workqueue(hdev->prefetch_wq); - /* Release all pending user interrupts, each pending user interrupt - * holds a reference to user context - */ - hl_release_pending_user_interrupts(hdev); + hl_abort_waiting_for_completions(hdev); } /* @@ -1921,7 +1928,7 @@ out: hl_ctx_put(ctx); - hl_abort_waitings_for_completion(hdev); + hl_abort_waiting_for_completions(hdev); return 0; diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index 6b102947cb90..f83ea96c6530 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -3934,7 +3934,7 @@ void hl_dec_fini(struct hl_device *hdev); void hl_dec_ctx_fini(struct hl_ctx *ctx); void hl_release_pending_user_interrupts(struct hl_device *hdev); -void hl_abort_waitings_for_completion(struct hl_device *hdev); +void hl_abort_waiting_for_cs_completions(struct hl_device *hdev); int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx, struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig); -- cgit v1.2.3 From 3d21ec6424e6d38f284c37d77e7ec524c1a454f2 Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Mon, 17 Apr 2023 21:24:19 +0300 Subject: accel/habanalabs: add missing tpc interrupt info For some reason the last possible tpc interrupt cause in gaudi2_tpc_interrupts_cause is missing from the code. Signed-off-by: Dafna Hirschfeld Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/gaudi2/gaudi2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index 5c80e7af5b78..058741698d71 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -63,7 +63,7 @@ #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE 3 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE 2 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE 22 -#define GAUDI2_NUM_OF_TPC_INTR_CAUSE 30 +#define GAUDI2_NUM_OF_TPC_INTR_CAUSE 31 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25 #define GAUDI2_NUM_OF_MME_ERR_CAUSE 16 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE 5 @@ -891,6 +891,7 @@ static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAU "invalid_lock_access", "LD_L protection violation", "ST_L protection violation", + "D$ L0CS mismatch", }; static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = { -- cgit v1.2.3 From d8b9cea584661b30305cf341bf9f675dc0a25471 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Tue, 18 Apr 2023 14:48:22 +0300 Subject: accel/habanalabs: add pci health check during heartbeat Currently upon a heartbeat failure, we don't know if the failure is due to firmware hang or due to a bad PCI link. Hence, we are reading a PCI config space register with a known value (vendor ID) so we will know which of the two possibilities caused the heartbeat failure. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/common/device.c | 15 ++++++++++++++- drivers/accel/habanalabs/common/habanalabs.h | 2 ++ drivers/accel/habanalabs/common/habanalabs_drv.c | 2 -- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c index c8f4a34d2831..98b46b2e1898 100644 --- a/drivers/accel/habanalabs/common/device.c +++ b/drivers/accel/habanalabs/common/device.c @@ -981,6 +981,18 @@ static void device_early_fini(struct hl_device *hdev) hdev->asic_funcs->early_fini(hdev); } +static bool is_pci_link_healthy(struct hl_device *hdev) +{ + u16 vendor_id; + + if (!hdev->pdev) + return false; + + pci_read_config_word(hdev->pdev, PCI_VENDOR_ID, &vendor_id); + + return (vendor_id == PCI_VENDOR_ID_HABANALABS); +} + static void hl_device_heartbeat(struct work_struct *work) { struct hl_device *hdev = container_of(work, struct hl_device, @@ -995,7 +1007,8 @@ static void hl_device_heartbeat(struct work_struct *work) goto reschedule; if (hl_device_operational(hdev, NULL)) - dev_err(hdev->dev, "Device heartbeat failed!\n"); + dev_err(hdev->dev, "Device heartbeat failed! PCI link is %s\n", + is_pci_link_healthy(hdev) ? "healthy" : "broken"); info.err_type = HL_INFO_FW_HEARTBEAT_ERR; info.event_mask = &event_mask; diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index f83ea96c6530..ad412cc01aba 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -36,6 +36,8 @@ struct hl_device; struct hl_fpriv; +#define PCI_VENDOR_ID_HABANALABS 0x1da3 + /* Use upper bits of mmap offset to store habana driver specific information. * bits[63:59] - Encode mmap type * bits[45:0] - mmap offset value diff --git a/drivers/accel/habanalabs/common/habanalabs_drv.c b/drivers/accel/habanalabs/common/habanalabs_drv.c index d9df64e75f33..1ec97da3dddb 100644 --- a/drivers/accel/habanalabs/common/habanalabs_drv.c +++ b/drivers/accel/habanalabs/common/habanalabs_drv.c @@ -54,8 +54,6 @@ module_param(boot_error_status_mask, ulong, 0444); MODULE_PARM_DESC(boot_error_status_mask, "Mask of the error status during device CPU boot (If bitX is cleared then error X is masked. Default all 1's)"); -#define PCI_VENDOR_ID_HABANALABS 0x1da3 - #define PCI_IDS_GOYA 0x0001 #define PCI_IDS_GAUDI 0x1000 #define PCI_IDS_GAUDI_SEC 0x1010 -- cgit v1.2.3 From 3b9abb4fa642ee28bcd22a0398087a2d87d3c987 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Sun, 19 Mar 2023 21:46:44 +0200 Subject: accel/habanalabs: expose debugfs files later Currently the debugfs root folder and files for a device are created at an early step, before the device initialization and before the char device and sysfs files are exposed to user. As there is no real reason not to do it together with the device creation, postpone it to be done right afterwards. The initialization of the debugfs entry structure is left in its current position because it is used before creating the files. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/common/debugfs.c | 37 ++++++++++------- drivers/accel/habanalabs/common/device.c | 62 +++++++++++++++------------- drivers/accel/habanalabs/common/habanalabs.h | 6 ++- 3 files changed, 61 insertions(+), 44 deletions(-) diff --git a/drivers/accel/habanalabs/common/debugfs.c b/drivers/accel/habanalabs/common/debugfs.c index 22dd17c077c0..29b78c7cf5de 100644 --- a/drivers/accel/habanalabs/common/debugfs.c +++ b/drivers/accel/habanalabs/common/debugfs.c @@ -1756,17 +1756,15 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent } } -void hl_debugfs_add_device(struct hl_device *hdev) +int hl_debugfs_device_init(struct hl_device *hdev) { struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; int count = ARRAY_SIZE(hl_debugfs_list); dev_entry->hdev = hdev; - dev_entry->entry_arr = kmalloc_array(count, - sizeof(struct hl_debugfs_entry), - GFP_KERNEL); + dev_entry->entry_arr = kmalloc_array(count, sizeof(struct hl_debugfs_entry), GFP_KERNEL); if (!dev_entry->entry_arr) - return; + return -ENOMEM; dev_entry->data_dma_blob_desc.size = 0; dev_entry->data_dma_blob_desc.data = NULL; @@ -1787,21 +1785,14 @@ void hl_debugfs_add_device(struct hl_device *hdev) spin_lock_init(&dev_entry->userptr_spinlock); mutex_init(&dev_entry->ctx_mem_hash_mutex); - dev_entry->root = debugfs_create_dir(dev_name(hdev->dev), - hl_debug_root); - - add_files_to_device(hdev, dev_entry, dev_entry->root); - if (!hdev->asic_prop.fw_security_enabled) - add_secured_nodes(dev_entry, dev_entry->root); + return 0; } -void hl_debugfs_remove_device(struct hl_device *hdev) +void hl_debugfs_device_fini(struct hl_device *hdev) { struct hl_dbg_device_entry *entry = &hdev->hl_debugfs; int i; - debugfs_remove_recursive(entry->root); - mutex_destroy(&entry->ctx_mem_hash_mutex); mutex_destroy(&entry->file_mutex); @@ -1814,6 +1805,24 @@ void hl_debugfs_remove_device(struct hl_device *hdev) kfree(entry->entry_arr); } +void hl_debugfs_add_device(struct hl_device *hdev) +{ + struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; + + dev_entry->root = debugfs_create_dir(dev_name(hdev->dev), hl_debug_root); + + add_files_to_device(hdev, dev_entry, dev_entry->root); + if (!hdev->asic_prop.fw_security_enabled) + add_secured_nodes(dev_entry, dev_entry->root); +} + +void hl_debugfs_remove_device(struct hl_device *hdev) +{ + struct hl_dbg_device_entry *entry = &hdev->hl_debugfs; + + debugfs_remove_recursive(entry->root); +} + void hl_debugfs_add_file(struct hl_fpriv *hpriv) { struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs; diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c index 98b46b2e1898..cab5a63db8c1 100644 --- a/drivers/accel/habanalabs/common/device.c +++ b/drivers/accel/habanalabs/common/device.c @@ -674,7 +674,7 @@ static int device_init_cdev(struct hl_device *hdev, struct class *class, return 0; } -static int device_cdev_sysfs_add(struct hl_device *hdev) +static int cdev_sysfs_debugfs_add(struct hl_device *hdev) { int rc; @@ -699,7 +699,9 @@ static int device_cdev_sysfs_add(struct hl_device *hdev) goto delete_ctrl_cdev_device; } - hdev->cdev_sysfs_created = true; + hl_debugfs_add_device(hdev); + + hdev->cdev_sysfs_debugfs_created = true; return 0; @@ -710,11 +712,12 @@ delete_cdev_device: return rc; } -static void device_cdev_sysfs_del(struct hl_device *hdev) +static void cdev_sysfs_debugfs_remove(struct hl_device *hdev) { - if (!hdev->cdev_sysfs_created) + if (!hdev->cdev_sysfs_debugfs_created) goto put_devices; + hl_debugfs_remove_device(hdev); hl_sysfs_fini(hdev); cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); cdev_device_del(&hdev->cdev, hdev->dev); @@ -2054,7 +2057,7 @@ out_err: int hl_device_init(struct hl_device *hdev) { int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt; - bool add_cdev_sysfs_on_err = false; + bool expose_interfaces_on_err = false; rc = create_cdev(hdev); if (rc) @@ -2170,16 +2173,22 @@ int hl_device_init(struct hl_device *hdev) hdev->device_release_watchdog_timeout_sec = HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC; hdev->memory_scrub_val = MEM_SCRUB_DEFAULT_VAL; - hl_debugfs_add_device(hdev); - /* debugfs nodes are created in hl_ctx_init so it must be called after - * hl_debugfs_add_device. + rc = hl_debugfs_device_init(hdev); + if (rc) { + dev_err(hdev->dev, "failed to initialize debugfs entry structure\n"); + kfree(hdev->kernel_ctx); + goto mmu_fini; + } + + /* The debugfs entry structure is accessed in hl_ctx_init(), so it must be called after + * hl_debugfs_device_init(). */ rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); if (rc) { dev_err(hdev->dev, "failed to initialize kernel context\n"); kfree(hdev->kernel_ctx); - goto remove_device_from_debugfs; + goto debugfs_device_fini; } rc = hl_cb_pool_init(hdev); @@ -2195,11 +2204,10 @@ int hl_device_init(struct hl_device *hdev) } /* - * From this point, override rc (=0) in case of an error to allow - * debugging (by adding char devices and create sysfs nodes as part of - * the error flow). + * From this point, override rc (=0) in case of an error to allow debugging + * (by adding char devices and creating sysfs/debugfs files as part of the error flow). */ - add_cdev_sysfs_on_err = true; + expose_interfaces_on_err = true; /* Device is now enabled as part of the initialization requires * communication with the device firmware to get information that @@ -2241,15 +2249,13 @@ int hl_device_init(struct hl_device *hdev) } /* - * Expose devices and sysfs nodes to user. - * From here there is no need to add char devices and create sysfs nodes - * in case of an error. + * Expose devices and sysfs/debugfs files to user. + * From here there is no need to expose them in case of an error. */ - add_cdev_sysfs_on_err = false; - rc = device_cdev_sysfs_add(hdev); + expose_interfaces_on_err = false; + rc = cdev_sysfs_debugfs_add(hdev); if (rc) { - dev_err(hdev->dev, - "Failed to add char devices and sysfs nodes\n"); + dev_err(hdev->dev, "Failed to add char devices and sysfs/debugfs files\n"); rc = 0; goto out_disabled; } @@ -2295,8 +2301,8 @@ release_ctx: if (hl_ctx_put(hdev->kernel_ctx) != 1) dev_err(hdev->dev, "kernel ctx is still alive on initialization failure\n"); -remove_device_from_debugfs: - hl_debugfs_remove_device(hdev); +debugfs_device_fini: + hl_debugfs_device_fini(hdev); mmu_fini: hl_mmu_fini(hdev); eq_fini: @@ -2320,8 +2326,8 @@ free_dev: put_device(hdev->dev); out_disabled: hdev->disabled = true; - if (add_cdev_sysfs_on_err) - device_cdev_sysfs_add(hdev); + if (expose_interfaces_on_err) + cdev_sysfs_debugfs_add(hdev); if (hdev->pdev) dev_err(&hdev->pdev->dev, "Failed to initialize hl%d. Device %s is NOT usable !\n", @@ -2447,8 +2453,6 @@ void hl_device_fini(struct hl_device *hdev) if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1)) dev_err(hdev->dev, "kernel ctx is still alive\n"); - hl_debugfs_remove_device(hdev); - hl_dec_fini(hdev); hl_vm_fini(hdev); @@ -2473,8 +2477,10 @@ void hl_device_fini(struct hl_device *hdev) device_early_fini(hdev); - /* Hide devices and sysfs nodes from user */ - device_cdev_sysfs_del(hdev); + /* Hide devices and sysfs/debugfs files from user */ + cdev_sysfs_debugfs_remove(hdev); + + hl_debugfs_device_fini(hdev); pr_info("removed device successfully\n"); } diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index ad412cc01aba..ea0914d08bdc 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -3292,7 +3292,7 @@ struct hl_reset_info { * @in_debug: whether the device is in a state where the profiling/tracing infrastructure * can be used. This indication is needed because in some ASICs we need to do * specific operations to enable that infrastructure. - * @cdev_sysfs_created: were char devices and sysfs nodes created. + * @cdev_sysfs_debugfs_created: were char devices and sysfs/debugfs files created. * @stop_on_err: true if engines should stop on error. * @supports_sync_stream: is sync stream supported. * @sync_stream_queue_idx: helper index for sync stream queues initialization. @@ -3459,7 +3459,7 @@ struct hl_device { u8 init_done; u8 device_cpu_disabled; u8 in_debug; - u8 cdev_sysfs_created; + u8 cdev_sysfs_debugfs_created; u8 stop_on_err; u8 supports_sync_stream; u8 sync_stream_queue_idx; @@ -3978,6 +3978,8 @@ void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info); void hl_debugfs_init(void); void hl_debugfs_fini(void); +int hl_debugfs_device_init(struct hl_device *hdev); +void hl_debugfs_device_fini(struct hl_device *hdev); void hl_debugfs_add_device(struct hl_device *hdev); void hl_debugfs_remove_device(struct hl_device *hdev); void hl_debugfs_add_file(struct hl_fpriv *hpriv); -- cgit v1.2.3 From 67d19a2f49b07b2c4a15e6a96e1de46d95209ae4 Mon Sep 17 00:00:00 2001 From: Koby Elbaz Date: Mon, 27 Mar 2023 11:56:16 +0300 Subject: accel/habanalabs: poll for device status update following WFE cmd Currently, we rely on COMMS protocol's ack to verify that WFE command has been acknowledged by the FW. However, this does not guarantee that the device status has been updated. Although unlikely, this could trigger a race since the driver expects the device to be halted at that stage, but it might not be. Therefore, we increase WFE's robustness by polling on the status register that will be updated once the device is actually halted. Signed-off-by: Koby Elbaz Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/common/firmware_if.c | 28 ++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c index e48f024d8649..eb51d7f70aec 100644 --- a/drivers/accel/habanalabs/common/firmware_if.c +++ b/drivers/accel/habanalabs/common/firmware_if.c @@ -1368,8 +1368,10 @@ void hl_fw_ask_hard_reset_without_linux(struct hl_device *hdev) void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev) { - struct static_fw_load_mgr *static_loader = - &hdev->fw_loader.static_loader; + struct fw_load_mgr *fw_loader = &hdev->fw_loader; + u32 status, cpu_boot_status_reg, cpu_timeout; + struct static_fw_load_mgr *static_loader; + struct pre_fw_load_props *pre_fw_load; int rc; if (hdev->device_cpu_is_halted) @@ -1377,12 +1379,28 @@ void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev) /* Stop device CPU to make sure nothing bad happens */ if (hdev->asic_prop.dynamic_fw_load) { + pre_fw_load = &fw_loader->pre_fw_load; + cpu_timeout = fw_loader->cpu_timeout; + cpu_boot_status_reg = pre_fw_load->cpu_boot_status_reg; + rc = hl_fw_dynamic_send_protocol_cmd(hdev, &hdev->fw_loader, - COMMS_GOTO_WFE, 0, false, - hdev->fw_loader.cpu_timeout); - if (rc) + COMMS_GOTO_WFE, 0, false, cpu_timeout); + if (rc) { dev_err(hdev->dev, "Failed sending COMMS_GOTO_WFE\n"); + } else { + rc = hl_poll_timeout( + hdev, + cpu_boot_status_reg, + status, + status == CPU_BOOT_STATUS_IN_WFE, + hdev->fw_poll_interval_usec, + cpu_timeout); + if (rc) + dev_err(hdev->dev, "Current status=%u. Timed-out updating to WFE\n", + status); + } } else { + static_loader = &hdev->fw_loader.static_loader; WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_GOTO_WFE); msleep(static_loader->cpu_reset_wait_msec); -- cgit v1.2.3 From 7d212963366e8113d1a8548a35eb9cd5df110dca Mon Sep 17 00:00:00 2001 From: Koby Elbaz Date: Thu, 20 Apr 2023 14:17:31 +0300 Subject: accel/habanalabs: fix a static warning - 'dubious: x & !y' Use a straight forward approach to get a conditional result. Signed-off-by: Koby Elbaz Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/gaudi2/gaudi2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index 058741698d71..240fecfab608 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -4527,7 +4527,7 @@ static int gaudi2_set_tpc_engine_mode(struct hl_device *hdev, u32 engine_id, u32 reg_base = gaudi2_tpc_cfg_blocks_bases[tpc_id]; reg_addr = reg_base + TPC_CFG_STALL_OFFSET; reg_val = FIELD_PREP(DCORE0_TPC0_CFG_TPC_STALL_V_MASK, - !!(engine_command == HL_ENGINE_STALL)); + (engine_command == HL_ENGINE_STALL) ? 1 : 0); WREG32(reg_addr, reg_val); if (engine_command == HL_ENGINE_RESUME) { @@ -4551,7 +4551,7 @@ static int gaudi2_set_mme_engine_mode(struct hl_device *hdev, u32 engine_id, u32 reg_base = gaudi2_mme_ctrl_lo_blocks_bases[mme_id]; reg_addr = reg_base + MME_CTRL_LO_QM_STALL_OFFSET; reg_val = FIELD_PREP(DCORE0_MME_CTRL_LO_QM_STALL_V_MASK, - !!(engine_command == HL_ENGINE_STALL)); + (engine_command == HL_ENGINE_STALL) ? 1 : 0); WREG32(reg_addr, reg_val); return 0; @@ -4572,7 +4572,7 @@ static int gaudi2_set_edma_engine_mode(struct hl_device *hdev, u32 engine_id, u3 reg_base = gaudi2_dma_core_blocks_bases[edma_id]; reg_addr = reg_base + EDMA_CORE_CFG_STALL_OFFSET; reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, - !!(engine_command == HL_ENGINE_STALL)); + (engine_command == HL_ENGINE_STALL) ? 1 : 0); WREG32(reg_addr, reg_val); if (engine_command == HL_ENGINE_STALL) { -- cgit v1.2.3 From d0dcd4bbfa47f8726df5a45236c0edd0107ec5c3 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Mon, 24 Apr 2023 14:17:03 +0300 Subject: accel/habanalabs: always fetch pci addr_dec error info Due to missing indication of address decode source (LBW/HBW bus), we should always try and fetch extended information. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/gaudi2/gaudi2.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index 240fecfab608..d21ef9997d05 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -8892,14 +8892,12 @@ static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_typ "err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]); error_count++; - switch (intr_cause_data & BIT_ULL(i)) { - case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK: - hl_check_for_glbl_errors(hdev); - break; - case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK: - gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask); - break; - } + /* + * Always check for LBW and HBW additional info as the indication itself is + * sometimes missing + */ + hl_check_for_glbl_errors(hdev); + gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask); } return error_count; -- cgit v1.2.3 From cc1eeaa335f2d71e8bddf268e3e60d3a3c3a08bc Mon Sep 17 00:00:00 2001 From: Yang Li Date: Fri, 12 May 2023 14:46:55 +0800 Subject: accel/habanalabs: Fix some kernel-doc comments Make the description of @regs_range_array and @regs_range_array_size to @user_regs_range_array and @user_regs_range_array_size to silence the warnings: drivers/accel/habanalabs/common/security.c:506: warning: Function parameter or member 'user_regs_range_array' not described in 'hl_init_pb_ranges_single_dcore' drivers/accel/habanalabs/common/security.c:506: warning: Function parameter or member 'user_regs_range_array_size' not described in 'hl_init_pb_ranges_single_dcore' drivers/accel/habanalabs/common/security.c:506: warning: Excess function parameter 'regs_range_array' description in 'hl_init_pb_ranges_single_dcore' drivers/accel/habanalabs/common/security.c:506: warning: Excess function parameter 'regs_range_array_size' description in 'hl_init_pb_ranges_single_dcore' Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=4940 Signed-off-by: Yang Li Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/common/security.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/accel/habanalabs/common/security.c b/drivers/accel/habanalabs/common/security.c index 297e6e44fd0c..dc23ff57c91a 100644 --- a/drivers/accel/habanalabs/common/security.c +++ b/drivers/accel/habanalabs/common/security.c @@ -495,8 +495,8 @@ free_glbl_sec: * @instance_offset: offset between instances * @pb_blocks: blocks array * @blocks_array_size: blocks array size - * @regs_range_array: register range array - * @regs_range_array_size: register range array size + * @user_regs_range_array: register range array + * @user_regs_range_array_size: register range array size * */ int hl_init_pb_ranges_single_dcore(struct hl_device *hdev, u32 dcore_offset, -- cgit v1.2.3 From 9ec7639b5e124f273db20555cc38bd40057157b3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 15 May 2023 13:32:18 +0300 Subject: accel/habanalabs: fix gaudi2_get_tpc_idle_status() return The gaudi2_get_tpc_idle_status() function returned the incorrect variable so it always returned true. Fixes: d85f0531b928 ("accel/habanalabs: break is_idle function into per-engine sub-routines") Signed-off-by: Dan Carpenter Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/gaudi2/gaudi2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index d21ef9997d05..e900017f4ff7 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -7238,7 +7238,7 @@ static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 gaudi2_iterate_tpcs(hdev, &tpc_iter); - return tpc_idle_data.is_idle; + return *tpc_idle_data.is_idle; } static bool gaudi2_get_decoder_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, -- cgit v1.2.3 From 964234aba59b0a92de701f25d0d6da7a5ebb3e16 Mon Sep 17 00:00:00 2001 From: Koby Elbaz Date: Tue, 16 May 2023 09:56:18 +0300 Subject: accel/habanalabs: rename security functions related arguments Make the argument names specify the registers array represent registers that should be unsecured so the user can access them. Signed-off-by: Koby Elbaz Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/common/security.c | 57 +++++++++++++++--------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/drivers/accel/habanalabs/common/security.c b/drivers/accel/habanalabs/common/security.c index dc23ff57c91a..fe913965dbad 100644 --- a/drivers/accel/habanalabs/common/security.c +++ b/drivers/accel/habanalabs/common/security.c @@ -284,14 +284,14 @@ void hl_secure_block(struct hl_device *hdev, * @instance_offset: offset between instances * @pb_blocks: blocks array * @blocks_array_size: blocks array size - * @regs_array: register array - * @regs_array_size: register array size + * @user_regs_array: unsecured register array + * @user_regs_array_size: unsecured register array size * @mask: enabled instances mask: 1- enabled, 0- disabled */ int hl_init_pb_with_mask(struct hl_device *hdev, u32 num_dcores, u32 dcore_offset, u32 num_instances, u32 instance_offset, const u32 pb_blocks[], u32 blocks_array_size, - const u32 *regs_array, u32 regs_array_size, u64 mask) + const u32 *user_regs_array, u32 user_regs_array_size, u64 mask) { int i, j; struct hl_block_glbl_sec *glbl_sec; @@ -303,8 +303,8 @@ int hl_init_pb_with_mask(struct hl_device *hdev, u32 num_dcores, return -ENOMEM; hl_secure_block(hdev, glbl_sec, blocks_array_size); - hl_unsecure_registers(hdev, regs_array, regs_array_size, 0, pb_blocks, - glbl_sec, blocks_array_size); + hl_unsecure_registers(hdev, user_regs_array, user_regs_array_size, 0, + pb_blocks, glbl_sec, blocks_array_size); /* Fill all blocks with the same configuration */ for (i = 0 ; i < num_dcores ; i++) { @@ -336,19 +336,19 @@ int hl_init_pb_with_mask(struct hl_device *hdev, u32 num_dcores, * @instance_offset: offset between instances * @pb_blocks: blocks array * @blocks_array_size: blocks array size - * @regs_array: register array - * @regs_array_size: register array size + * @user_regs_array: unsecured register array + * @user_regs_array_size: unsecured register array size * */ int hl_init_pb(struct hl_device *hdev, u32 num_dcores, u32 dcore_offset, u32 num_instances, u32 instance_offset, const u32 pb_blocks[], u32 blocks_array_size, - const u32 *regs_array, u32 regs_array_size) + const u32 *user_regs_array, u32 user_regs_array_size) { return hl_init_pb_with_mask(hdev, num_dcores, dcore_offset, num_instances, i