From 486e19795f2ee11f0334e2e3fcf8951d4981ff88 Mon Sep 17 00:00:00 2001 From: Yuri Nudelman Date: Thu, 3 Jun 2021 17:51:58 +0300 Subject: habanalabs: allow fail on inability to respect hint A new user flag is required to make memory map hint mandatory, in contrast to the current situation where it is best effort. This is due to the requirement to map certain data to specific pre-determined device virtual address ranges. Signed-off-by: Yuri Nudelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index a47a731e4527..18765eb75b65 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -813,6 +813,7 @@ union hl_wait_cs_args { #define HL_MEM_CONTIGUOUS 0x1 #define HL_MEM_SHARED 0x2 #define HL_MEM_USERPTR 0x4 +#define HL_MEM_FORCE_HINT 0x8 struct hl_mem_in { union { -- cgit v1.2.3 From 215f0c1775d5506c8a833b5c85a77b5fb65bf26b Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Mon, 14 Jun 2021 22:18:41 +0300 Subject: habanalabs: add wait-for-multi-CS uAPI When user sends multiple CSs, waiting for each CS is not efficient as it involves many user-kernel context switches. In order to address this issue we add support to "wait on multiple CSs" using a new uAPI which can wait on maximum of 32 CSs. The new uAPI is defined using a new flag - WAIT_FOR_MULTI_CS - in the wait_for_cs IOCTL. The input parameters for this uAPI will be: @seq: user pointer to an array of up to 32 CS's sequence numbers. @seq_array_len: length of sequence array. @timeout_us: timeout for waiting for any CS. The output paramateres for this API will be: @status: multi CS ioctl completion status (dedicated status was added as well). @flags: bitmap of output flags of the CS. @cs_completion_map: bitmap for multi CS, if CS sequence that was placed in index N in input seq array has completed- the N-th bit in cs_completion_map will be 1, otherwise it will be 0. @timestamp_nsec: timestamp of the first completed CS Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 18765eb75b65..49c737c4a2f6 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -735,11 +735,18 @@ union hl_cs_args { #define HL_WAIT_CS_FLAGS_INTERRUPT 0x2 #define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 +#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 + +#define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32 struct hl_wait_cs_in { union { struct { - /* Command submission sequence number */ + /* + * In case of wait_cs holds the CS sequence number. + * In case of wait for multi CS hold a user pointer to + * an array of CS sequence numbers + */ __u64 seq; /* Absolute timeout to wait for command submission * in microseconds @@ -767,12 +774,17 @@ struct hl_wait_cs_in { /* Context ID - Currently not in use */ __u32 ctx_id; + /* HL_WAIT_CS_FLAGS_* * If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include * interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK, in order * not to specify an interrupt id ,set mask to all 1s. */ __u32 flags; + + /* Multi CS API info- valid entries in multi-CS array */ + __u8 seq_arr_len; + __u8 pad[7]; }; #define HL_WAIT_CS_STATUS_COMPLETED 0 @@ -789,8 +801,15 @@ struct hl_wait_cs_out { __u32 status; /* HL_WAIT_CS_STATUS_FLAG* */ __u32 flags; - /* valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set */ + /* + * valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set + * for wait_cs: timestamp of CS completion + * for wait_multi_cs: timestamp of FIRST CS completion + */ __s64 timestamp_nsec; + /* multi CS completion bitmap */ + __u32 cs_completion_map; + __u32 pad; }; union hl_wait_cs_args { -- cgit v1.2.3 From dadf17abb7245d9556591d8cc78bf57462e3b20a Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Mon, 24 May 2021 18:09:22 +0300 Subject: habanalabs: add support for encapsulated signals reservation The signaling from within encapsulated OP capability is merged into the existing stream architecture, such that one can trigger multiple signaling from an encapsulated op, according to the time the event was done in the graph execution and avoid the need to wait for the whole encapsulated OP execution to be complete before the stream can signal. This commit implements only the reserve/unreserve part. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 110 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 97 insertions(+), 13 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 49c737c4a2f6..eca86c545916 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -628,12 +628,21 @@ struct hl_cs_chunk { __u64 cb_handle; /* Relevant only when HL_CS_FLAGS_WAIT or - * HL_CS_FLAGS_COLLECTIVE_WAIT is set. + * HL_CS_FLAGS_COLLECTIVE_WAIT is set * This holds address of array of u64 values that contain - * signal CS sequence numbers. The wait described by this job - * will listen on all those signals (wait event per signal) + * signal CS sequence numbers. The wait described by + * this job will listen on all those signals + * (wait event per signal) */ __u64 signal_seq_arr; + + /* + * Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set + * along with HL_CS_FLAGS_ENCAP_SIGNALS. + * This is the CS sequence which has the encapsulated signals. + */ + __u64 encaps_signal_seq; }; /* Index of queue to put the CB on */ @@ -651,6 +660,17 @@ struct hl_cs_chunk { * Number of entries in signal_seq_arr */ __u32 num_signal_seq_arr; + + /* Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set along + * with HL_CS_FLAGS_ENCAP_SIGNALS + * This set the signals range that the user want to wait for + * out of the whole reserved signals range. + * e.g if the signals range is 20, and user don't want + * to wait for signal 8, so he set this offset to 7, then + * he call the API again with 9 and so on till 20. + */ + __u32 encaps_signal_offset; }; /* HL_CS_CHUNK_FLAGS_* */ @@ -678,6 +698,28 @@ struct hl_cs_chunk { #define HL_CS_FLAGS_CUSTOM_TIMEOUT 0x200 #define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT 0x400 +/* + * The encapsulated signals CS is merged into the existing CS ioctls. + * In order to use this feature need to follow the below procedure: + * 1. Reserve signals, set the CS type to HL_CS_FLAGS_RESERVE_SIGNALS_ONLY + * the output of this API will be the SOB offset from CFG_BASE. + * this address will be used to patch CB cmds to do the signaling for this + * SOB by incrementing it's value. + * for reverting the reservation use HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY + * CS type, note that this might fail if out-of-sync happened to the SOB + * value, in case other signaling request to the same SOB occurred between + * reserve-unreserve calls. + * 2. Use the staged CS to do the encapsulated signaling jobs. + * use HL_CS_FLAGS_STAGED_SUBMISSION and HL_CS_FLAGS_STAGED_SUBMISSION_FIRST + * along with HL_CS_FLAGS_ENCAP_SIGNALS flag, and set encaps_signal_offset + * field. This offset allows app to wait on part of the reserved signals. + * 3. Use WAIT/COLLECTIVE WAIT CS along with HL_CS_FLAGS_ENCAP_SIGNALS flag + * to wait for the encapsulated signals. + */ +#define HL_CS_FLAGS_ENCAP_SIGNALS 0x800 +#define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY 0x1000 +#define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY 0x2000 + #define HL_CS_STATUS_SUCCESS 0 #define HL_MAX_JOBS_PER_CS 512 @@ -690,10 +732,35 @@ struct hl_cs_in { /* holds address of array of hl_cs_chunk for execution phase */ __u64 chunks_execute; - /* Sequence number of a staged submission CS - * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set - */ - __u64 seq; + union { + /* + * Sequence number of a staged submission CS + * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set and + * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST is unset. + */ + __u64 seq; + + /* + * Encapsulated signals handle id + * Valid for two flows: + * 1. CS with encapsulated signals: + * when HL_CS_FLAGS_STAGED_SUBMISSION and + * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST + * and HL_CS_FLAGS_ENCAP_SIGNALS are set. + * 2. unreserve signals: + * valid when HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY is set. + */ + __u32 encaps_sig_handle_id; + + /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ + struct { + /* Encapsulated signals number */ + __u32 encaps_signals_count; + + /* Encapsulated signals queue index (stream) */ + __u32 encaps_signals_q_idx; + }; + }; /* Number of chunks in restore phase array. Maximum number is * HL_MAX_JOBS_PER_CS @@ -718,14 +785,31 @@ struct hl_cs_in { }; struct hl_cs_out { + union { + /* + * seq holds the sequence number of the CS to pass to wait + * ioctl. All values are valid except for 0 and ULLONG_MAX + */ + __u64 seq; + + /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ + struct { + /* This is the resereved signal handle id */ + __u32 handle_id; + + /* This is the signals count */ + __u32 count; + }; + }; + + /* HL_CS_STATUS */ + __u32 status; + /* - * seq holds the sequence number of the CS to pass to wait ioctl. All - * values are valid except for 0 and ULLONG_MAX + * SOB base address offset + * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ - __u64 seq; - /* HL_CS_STATUS_* */ - __u32 status; - __u32 pad; + __u32 sob_base_addr_offset; }; union hl_cs_args { -- cgit v1.2.3 From 5dc9ffaff1420676827c4054bc789a7710f2a272 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 15 Jul 2021 10:48:43 +0300 Subject: habanalabs: expose server type in INFO IOCTL Add the server type property to the hl_info_hw_ip_info structure that is exposed to the user via the INFO IOCTL. This is needed by the userspace s/w stack to know the connections map of the internal links that connect the ASIC among themselves inside the server. The F/W will tell us, as part of the NIC information, the server type that the GAUDI is located in. Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 48 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index eca86c545916..6686b73a0834 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -279,6 +279,14 @@ enum hl_device_status { HL_DEVICE_STATUS_NEEDS_RESET }; +enum hl_server_type { + HL_SERVER_TYPE_UNKNOWN = 0, + HL_SERVER_GAUDI_HLS1 = 1, + HL_SERVER_GAUDI_HLS1H = 2, + HL_SERVER_GAUDI_TYPE1 = 3, + HL_SERVER_GAUDI_TYPE2 = 4 +}; + /* Opcode for management ioctl * * HW_IP_INFO - Receive information about different IP blocks in the @@ -337,17 +345,49 @@ enum hl_device_status { #define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 +/** + * struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC + * @sram_base_address: The first SRAM physical base address that is free to be + * used by the user. + * @dram_base_address: The first DRAM virtual or physical base address that is + * free to be used by the user. + * @dram_size: The DRAM size that is available to the user. + * @sram_size: The SRAM size that is available to the user. + * @num_of_events: The number of events that can be received from the f/w. This + * is needed so the user can what is the size of the h/w events + * array he needs to pass to the kernel when he wants to fetch + * the event counters. + * @device_id: PCI device ID of the ASIC. + * @module_id: Module ID of the ASIC for mezzanine cards in servers + * (From OCP spec). + * @first_available_interrupt_id: The first available interrupt ID for the user + * to be used when it works with user interrupts. + * @server_type: Server type that the Gaudi ASIC is currently installed in. + * The value is according to enum hl_server_type + * @cpld_version: CPLD version on the board. + * @psoc_pci_pll_nr: PCI PLL NR value. Needed by the profiler in some ASICs. + * @psoc_pci_pll_nf: PCI PLL NF value. Needed by the profiler in some ASICs. + * @psoc_pci_pll_od: PCI PLL OD value. Needed by the profiler in some ASICs. + * @psoc_pci_pll_div_factor: PCI PLL DIV factor value. Needed by the profiler + * in some ASICs. + * @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant + * for Goya/Gaudi only. + * @dram_enabled: Whether the DRAM is enabled. + * @cpucp_version: The CPUCP f/w version. + * @card_name: The card name as passed by the f/w. + * @dram_page_size: The DRAM physical page size. + */ struct hl_info_hw_ip_info { __u64 sram_base_address; __u64 dram_base_address; __u64 dram_size; __u32 sram_size; __u32 num_of_events; - __u32 device_id; /* PCI Device ID */ - __u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */ + __u32 device_id; + __u32 module_id; __u32 reserved; __u16 first_available_interrupt_id; - __u16 reserved2; + __u16 server_type; __u32 cpld_version; __u32 psoc_pci_pll_nr; __u32 psoc_pci_pll_nf; @@ -358,7 +398,7 @@ struct hl_info_hw_ip_info { __u8 pad[2]; __u8 cpucp_version[HL_INFO_VERSION_MAX_LEN]; __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN]; - __u64 reserved3; + __u64 reserved2; __u64 dram_page_size; }; -- cgit v1.2.3 From 71731090ab17a208a58020e4b342fdfee280458a Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Mon, 16 Aug 2021 13:27:12 +0300 Subject: habanalabs: add "in device creation" status On init, the disabled state is cleared right before hw_init and that causes the device to report on "Operational" state before the device initialization is finished. Although the char device is not yet exposed to the user at this stage, the sysfs entries are exposed. This can cause errors in monitoring applications that use the sysfs entries. In order to avoid this, a new state "in device creation" is introduced to ne reported when the device is not disabled but is still in init flow. Signed-off-by: Omer Shpigelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 6686b73a0834..7cc2a0f3f2f5 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -276,7 +276,9 @@ enum hl_device_status { HL_DEVICE_STATUS_OPERATIONAL, HL_DEVICE_STATUS_IN_RESET, HL_DEVICE_STATUS_MALFUNCTION, - HL_DEVICE_STATUS_NEEDS_RESET + HL_DEVICE_STATUS_NEEDS_RESET, + HL_DEVICE_STATUS_IN_DEVICE_CREATION, + HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_DEVICE_CREATION }; enum hl_server_type { -- cgit v1.2.3