49 files changed, 2350 insertions, 831 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 94e9fb4cdd76..358ad56f6524 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -513,4 +513,5 @@ source "drivers/misc/cardreader/Kconfig"
 source "drivers/misc/habanalabs/Kconfig"
 source "drivers/misc/uacce/Kconfig"
 source "drivers/misc/pvpanic/Kconfig"
+source "drivers/misc/mchp_pci1xxxx/Kconfig"
 endmenu
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 2be8542616dd..ac9b3e757ba1 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -60,4 +60,5 @@ obj-$(CONFIG_XILINX_SDFEC)	+= xilinx_sdfec.o
 obj-$(CONFIG_HISI_HIKEY_USB)	+= hisi_hikey_usb.o
 obj-$(CONFIG_HI6421V600_IRQ)	+= hi6421v600-irq.o
 obj-$(CONFIG_OPEN_DICE)		+= open-dice.o
-obj-$(CONFIG_VCPU_STALL_DETECTOR)	+= vcpu_stall_detector.o
-\ No newline at end of file
+obj-$(CONFIG_GP_PCI1XXXX)	+= mchp_pci1xxxx/
+obj-$(CONFIG_VCPU_STALL_DETECTOR)	+= vcpu_stall_detector.o
diff --git a/drivers/misc/altera-stapl/altera.c b/drivers/misc/altera-stapl/altera.c
index 075f3a36d512..a58b7cb81d98 100644
--- a/drivers/misc/altera-stapl/altera.c
+++ b/drivers/misc/altera-stapl/altera.c
@@ -1014,7 +1014,7 @@ exit_done:
 			 * ...argument 0 is string ID
 			 */
 			count = strlen(msg_buff);
-			strlcpy(&msg_buff[count],
+			strscpy(&msg_buff[count],
 				&p[str_table + args[0]],
 				ALTERA_MESSAGE_LENGTH - count);
 			break;
@@ -2146,7 +2146,7 @@ static int altera_get_note(u8 *p, s32 program_size, s32 *offset,
 						&p[note_table + (8 * i) + 4])];
 
 				if (value != NULL)
-					strlcpy(value, value_ptr, vallen);
+					strscpy(value, value_ptr, vallen);
 
 			}
 		}
@@ -2162,13 +2162,13 @@ static int altera_get_note(u8 *p, s32 program_size, s32 *offset,
 			status = 0;
 
 			if (key != NULL)
-				strlcpy(key, &p[note_strings +
+				strscpy(key, &p[note_strings +
 						get_unaligned_be32(
 						&p[note_table + (8 * i)])],
 					keylen);
 
 			if (value != NULL)
-				strlcpy(value, &p[note_strings +
+				strscpy(value, &p[note_strings +
 						get_unaligned_be32(
 						&p[note_table + (8 * i) + 4])],
 					vallen);
diff --git a/drivers/misc/bcm-vk/bcm_vk_dev.c b/drivers/misc/bcm-vk/bcm_vk_dev.c
index a16b99bdaa13..d4a96137728d 100644
--- a/drivers/misc/bcm-vk/bcm_vk_dev.c
+++ b/drivers/misc/bcm-vk/bcm_vk_dev.c
@@ -1339,7 +1339,7 @@ static int bcm_vk_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	pci_set_drvdata(pdev, vk);
 
 	irq = pci_alloc_irq_vectors(pdev,
-				    1,
+				    VK_MSIX_IRQ_MIN_REQ,
 				    VK_MSIX_IRQ_MAX,
 				    PCI_IRQ_MSI | PCI_IRQ_MSIX);
 
@@ -1401,7 +1401,7 @@ static int bcm_vk_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		bcm_vk_tty_set_irq_enabled(vk, i);
 	}
 
-	id = ida_simple_get(&bcm_vk_ida, 0, 0, GFP_KERNEL);
+	id = ida_alloc(&bcm_vk_ida, GFP_KERNEL);
 	if (id < 0) {
 		err = id;
 		dev_err(dev, "unable to get id\n");
@@ -1500,7 +1500,7 @@ err_kfree_name:
 	misc_device->name = NULL;
 
 err_ida_remove:
-	ida_simple_remove(&bcm_vk_ida, id);
+	ida_free(&bcm_vk_ida, id);
 
 err_irq:
 	for (i = 0; i < vk->num_irqs; i++)
@@ -1573,7 +1573,7 @@ static void bcm_vk_remove(struct pci_dev *pdev)
 	if (misc_device->name) {
 		misc_deregister(misc_device);
 		kfree(misc_device->name);
-		ida_simple_remove(&bcm_vk_ida, vk->devid);
+		ida_free(&bcm_vk_ida, vk->devid);
 	}
 	for (i = 0; i < vk->num_irqs; i++)
 		devm_free_irq(&pdev->dev, pci_irq_vector(pdev, i), vk);
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index 60c829113299..2c64f55cf01f 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -280,22 +280,6 @@ void cxl_handle_fault(struct work_struct *fault_work)
 		mmput(mm);
 }
 
-static void cxl_prefault_one(struct cxl_context *ctx, u64 ea)
-{
-	struct mm_struct *mm;
-
-	mm = get_mem_context(ctx);
-	if (mm == NULL) {
-		pr_devel("cxl_prefault_one unable to get mm %i\n",
-			 pid_nr(ctx->pid));
-		return;
-	}
-
-	cxl_fault_segment(ctx, mm, ea);
-
-	mmput(mm);
-}
-
 static u64 next_segment(u64 ea, u64 vsid)
 {
 	if (vsid & SLB_VSID_B_1T)
@@ -306,23 +290,16 @@ static u64 next_segment(u64 ea, u64 vsid)
 	return ea + 1;
 }
 
-static void cxl_prefault_vma(struct cxl_context *ctx)
+static void cxl_prefault_vma(struct cxl_context *ctx, struct mm_struct *mm)
 {
 	u64 ea, last_esid = 0;
 	struct copro_slb slb;
+	VMA_ITERATOR(vmi, mm, 0);
 	struct vm_area_struct *vma;
 	int rc;
-	struct mm_struct *mm;
-
-	mm = get_mem_context(ctx);
-	if (mm == NULL) {
-		pr_devel("cxl_prefault_vm unable to get mm %i\n",
-			 pid_nr(ctx->pid));
-		return;
-	}
 
 	mmap_read_lock(mm);
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+	for_each_vma(vmi, vma) {
 		for (ea = vma->vm_start; ea < vma->vm_end;
 				ea = next_segment(ea, slb.vsid)) {
 			rc = copro_calculate_slb(mm, ea, &slb);
@@ -337,20 +314,28 @@ static void cxl_prefault_vma(struct cxl_context *ctx)
 		}
 	}
 	mmap_read_unlock(mm);
-
-	mmput(mm);
 }
 
 void cxl_prefault(struct cxl_context *ctx, u64 wed)
 {
+	struct mm_struct *mm = get_mem_context(ctx);
+
+	if (mm == NULL) {
+		pr_devel("cxl_prefault unable to get mm %i\n",
+			 pid_nr(ctx->pid));
+		return;
+	}
+
 	switch (ctx->afu->prefault_mode) {
 	case CXL_PREFAULT_WED:
-		cxl_prefault_one(ctx, wed);
+		cxl_fault_segment(ctx, mm, wed);
 		break;
 	case CXL_PREFAULT_ALL:
-		cxl_prefault_vma(ctx);
+		cxl_prefault_vma(ctx, mm);
 		break;
 	default:
 		break;
 	}
+
+	mmput(mm);
 }
diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c
index 4a9445fea93d..8a841a75d893 100644
--- a/drivers/misc/eeprom/eeprom.c
+++ b/drivers/misc/eeprom/eeprom.c
@@ -136,7 +136,7 @@ static int eeprom_detect(struct i2c_client *client, struct i2c_board_info *info)
 	 && !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_READ_I2C_BLOCK))
 		return -ENODEV;
 
-	strlcpy(info->type, "eeprom", I2C_NAME_SIZE);
+	strscpy(info->type, "eeprom", I2C_NAME_SIZE);
 
 	return 0;
 }
diff --git a/drivers/misc/eeprom/idt_89hpesx.c b/drivers/misc/eeprom/idt_89hpesx.c
index ada2a3af36d7..bb3ed352b95f 100644
--- a/drivers/misc/eeprom/idt_89hpesx.c
+++ b/drivers/misc/eeprom/idt_89hpesx.c
@@ -1075,7 +1075,7 @@ static const struct i2c_device_id *idt_ee_match_id(struct fwnode_handle *fwnode)
 		return NULL;
 
 	p = strchr(compatible, ',');
-	strlcpy(devname, p ? p + 1 : compatible, sizeof(devname));
+	strscpy(devname, p ? p + 1 : compatible, sizeof(devname));
 	/* Search through the device name */
 	while (id->name[0]) {
 		if (strcmp(devname, id->name) == 0)
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 5d9e3483b89d..7ff0b63c25e3 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -1515,7 +1515,7 @@ static int fastrpc_get_info_from_dsp(struct fastrpc_user *fl, uint32_t *dsp_attr
 	args[1].ptr = (u64)(uintptr_t)&dsp_attr_buf[1];
 	args[1].length = dsp_attr_buf_len;
 	args[1].fd = -1;
-	fl->pd = 1;
+	fl->pd = USER_PD;
 
 	return fastrpc_internal_invoke(fl, true, FASTRPC_DSP_UTILITIES_HANDLE,
 				       FASTRPC_SCALARS(0, 1, 1), args);
diff --git a/drivers/misc/habanalabs/Kconfig b/drivers/misc/habanalabs/Kconfig
index 861c81006c6d..bd01d0d940c0 100644
--- a/drivers/misc/habanalabs/Kconfig
+++ b/drivers/misc/habanalabs/Kconfig
@@ -10,6 +10,7 @@ config HABANA_AI
 	select HWMON
 	select DMA_SHARED_BUFFER
 	select CRC32
+	select FW_LOADER
 	help
 	  Enables PCIe card driver for Habana's AI Processors (AIP) that are
 	  designed to accelerate Deep Learning inference and training workloads.
diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile
index b35d7000c86b..a48a9e0969ed 100644
--- a/drivers/misc/habanalabs/Makefile
+++ b/drivers/misc/habanalabs/Makefile
@@ -8,13 +8,13 @@ obj-$(CONFIG_HABANA_AI) := habanalabs.o
 include $(src)/common/Makefile
 habanalabs-y += $(HL_COMMON_FILES)
 
-include $(src)/goya/Makefile
-habanalabs-y += $(HL_GOYA_FILES)
+include $(src)/gaudi2/Makefile
+habanalabs-y += $(HL_GAUDI2_FILES)
 
 include $(src)/gaudi/Makefile
 habanalabs-y += $(HL_GAUDI_FILES)
 
-include $(src)/gaudi2/Makefile
-habanalabs-y += $(HL_GAUDI2_FILES)
+include $(src)/goya/Makefile
+habanalabs-y += $(HL_GOYA_FILES)
 
 habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o
diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index b027f66f8bd4..2b332991ac6a 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -12,20 +12,18 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
+#define CB_VA_POOL_SIZE		(4UL * SZ_1G)
+
 static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
 {
 	struct hl_device *hdev = ctx->hdev;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	struct hl_vm_va_block *va_block, *tmp;
-	dma_addr_t bus_addr;
-	u64 virt_addr;
 	u32 page_size = prop->pmmu.page_size;
-	s32 offset;
 	int rc;
 
 	if (!hdev->supports_cb_mapping) {
 		dev_err_ratelimited(hdev->dev,
-				"Cannot map CB because no VA range is allocated for CB mapping\n");
+				"Mapping a CB to the device's MMU is not supported\n");
 		return -EINVAL;
 	}
 
@@ -35,106 +33,45 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
 		return -EINVAL;
 	}
 
-	INIT_LIST_HEAD(&cb->va_block_list);
-
-	for (bus_addr = cb->bus_address;
-			bus_addr < cb->bus_address + cb->size;
-			bus_addr += page_size) {
-
-		virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size);
-		if (!virt_addr) {
-			dev_err(hdev->dev,
-				"Failed to allocate device virtual address for CB\n");
-			rc = -ENOMEM;
-			goto err_va_pool_free;
-		}
+	if (cb->is_mmu_mapped)
+		return 0;
 
-		va_block = kzalloc(sizeof(*va_block), GFP_KERNEL);
-		if (!va_block) {
-			rc = -ENOMEM;
-			gen_pool_free(ctx->cb_va_pool, virt_addr, page_size);
-			goto err_va_pool_free;
-		}
+	cb->roundup_size = roundup(cb->size, page_size);
 
-		va_block->start = virt_addr;
-		va_block->end = virt_addr + page_size - 1;
-		va_block->size = page_size;
-		list_add_tail(&va_block->node, &cb->va_block_list);
+	cb->virtual_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, cb->roundup_size);
+	if (!cb->virtual_addr) {
+		dev_err(hdev->dev, "Failed to allocate device virtual address for CB\n");
+		return -ENOMEM;
 	}
 
-	mutex_lock(&ctx->mmu_lock);
-
-	bus_addr = cb->bus_address;
-	offset = 0;
-	list_for_each_entry(va_block, &cb->va_block_list, node) {
-		rc = hl_mmu_map_page(ctx, va_block->start, bus_addr,
-				va_block->size, list_is_last(&va_block->node,
-							&cb->va_block_list));
-		if (rc) {
-			dev_err(hdev->dev, "Failed to map VA %#llx to CB\n",
-				va_block->start);
-			goto err_va_umap;
-		}
-
-		bus_addr += va_block->size;
-		offset += va_block->size;
+	mutex_lock(&hdev->mmu_lock);
+	rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr);
+		goto err_va_umap;
 	}
-
 	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV);
-
-	mutex_unlock(&ctx->mmu_lock);
+	mutex_unlock(&hdev->mmu_lock);
 
 	cb->is_mmu_mapped = true;
-
 	return rc;
 
 err_va_umap:
-	list_for_each_entry(va_block, &cb->va_block_list, node) {
-		if (offset <= 0)
-			break;
-		hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
-				offset <= va_block->size);
-		offset -= va_block->size;
-	}
-
-	rc = hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
-
-	mutex_unlock(&ctx->mmu_lock);
-
-err_va_pool_free:
-	list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
-		gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
-		list_del(&va_block->node);
-		kfree(va_block);
-	}
-
+	mutex_unlock(&hdev->mmu_lock);
+	gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
 	return rc;
 }
 
 static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb)
 {
 	struct hl_device *hdev = ctx->hdev;
-	struct hl_vm_va_block *va_block, *tmp;
-
-	mutex_lock(&ctx->mmu_lock);
-
-	list_for_each_entry(va_block, &cb->va_block_list, node)
-		if (hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
-				list_is_last(&va_block->node,
-						&cb->va_block_list)))
-			dev_warn_ratelimited(hdev->dev,
-					"Failed to unmap CB's va 0x%llx\n",
-					va_block->start);
 
+	mutex_lock(&hdev->mmu_lock);
+	hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size);
 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
+	mutex_unlock(&hdev->mmu_lock);
 
-	mutex_unlock(&ctx->mmu_lock);
-
-	list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
-		gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
-		list_del(&va_block->node);
-		kfree(va_block);
-	}
+	gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
 }
 
 static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
@@ -376,7 +313,6 @@ int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle)
 static int hl_cb_info(struct hl_mem_mgr *mmg,
 			u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va)
 {
-	struct hl_vm_va_block *va_block;
 	struct hl_cb *cb;
 	int rc = 0;
 
@@ -388,9 +324,8 @@ static int hl_cb_info(struct hl_mem_mgr *mmg,
 	}
 
 	if (flags & HL_CB_FLAGS_GET_DEVICE_VA) {
-		va_block = list_first_entry(&cb->va_block_list, struct hl_vm_va_block, node);
-		if (va_block) {
-			*device_va = va_block->start;
+		if (cb->is_mmu_mapped) {
+			*device_va = cb->virtual_addr;
 		} else {
 			dev_err(mmg->dev, "CB is not mapped to the device's MMU\n");
 			rc = -EINVAL;
@@ -566,16 +501,23 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx)
 		return -ENOMEM;
 	}
 
-	rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr,
-			prop->cb_va_end_addr - prop->cb_va_start_addr, -1);
+	ctx->cb_va_pool_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
+					CB_VA_POOL_SIZE, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
+	if (!ctx->cb_va_pool_base) {
+		rc = -ENOMEM;
+		goto err_pool_destroy;
+	}
+	rc = gen_pool_add(ctx->cb_va_pool, ctx->cb_va_pool_base, CB_VA_POOL_SIZE, -1);
 	if (rc) {
 		dev_err(hdev->dev,
 			"Failed to add memory to VA gen pool for CB mapping\n");
-		goto err_pool_destroy;
+		goto err_unreserve_va_block;
 	}
 
 	return 0;
 
+err_unreserve_va_block:
+	hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE);
 err_pool_destroy:
 	gen_pool_destroy(ctx->cb_va_pool);
 
@@ -590,4 +532,5 @@ void hl_cb_va_pool_fini(struct hl_ctx *ctx)
 		return;
 
 	gen_pool_destroy(ctx->cb_va_pool);
+	hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE);
 }
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 90a4574cbe2d..fa05770865c6 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -12,7 +12,9 @@
 #include <linux/slab.h>
 
 #define HL_CS_FLAGS_TYPE_MASK	(HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
-					HL_CS_FLAGS_COLLECTIVE_WAIT)
+			HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \
+			HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND)
+
 
 #define MAX_TS_ITER_NUM 10
 
@@ -824,10 +826,10 @@ static void cs_timedout(struct work_struct *work)
 	}
 
 	/* Save only the first CS timeout parameters */
-	rc = atomic_cmpxchg(&hdev->last_error.cs_timeout.write_enable, 1, 0);
+	rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0);
 	if (rc) {
-		hdev->last_error.cs_timeout.timestamp = ktime_get();
-		hdev->last_error.cs_timeout.seq = cs->sequence;
+		hdev->captured_err_info.cs_timeout.timestamp = ktime_get();
+		hdev->captured_err_info.cs_timeout.seq = cs->sequence;
 
 		event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT |
 				HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT;
@@ -1242,6 +1244,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
 		return CS_RESERVE_SIGNALS;
 	else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
 		return CS_UNRESERVE_SIGNALS;
+	else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND)
+		return CS_TYPE_ENGINE_CORE;
 	else
 		return CS_TYPE_DEFAULT;
 }
@@ -1253,6 +1257,7 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
 	u32 cs_type_flags, num_chunks;
 	enum hl_device_status status;
 	enum hl_cs_type cs_type;
+	bool is_sync_stream;
 
 	if (!hl_device_operational(hdev, &status)) {
 		return -EBUSY;
@@ -1276,9 +1281,10 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
 	cs_type = hl_cs_get_cs_type(cs_type_flags);
 	num_chunks = args->in.num_chunks_execute;
 
-	if (unlikely((cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT ||
-			cs_type == CS_TYPE_COLLECTIVE_WAIT) &&
-			!hdev->supports_sync_stream)) {
+	is_sync_stream = (cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT ||
+			cs_type == CS_TYPE_COLLECTIVE_WAIT);
+
+	if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) {
 		dev_err(hdev->dev, "Sync stream CS is not supported\n");
 		return -EINVAL;
 	}
@@ -1288,7 +1294,7 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
 			dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid);
 			return -EINVAL;
 		}
-	} else if (num_chunks != 1) {
+	} else if (is_sync_stream && num_chunks != 1) {
 		dev_err(hdev->dev,
 			"Sync stream CS mandates one chunk only, context %d\n",
 			ctx->asid);
@@ -1584,13 +1590,14 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
 	struct hl_device *hdev = hpriv->hdev;
 	struct hl_ctx *ctx = hpriv->ctx;
 	bool need_soft_reset = false;
-	int rc = 0, do_ctx_switch;
+	int rc = 0, do_ctx_switch = 0;
 	void __user *chunks;
 	u32 num_chunks, tmp;
 	u16 sob_count;
 	int ret;
 
-	do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
+	if (hdev->supports_ctx_switch)
+		do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
 
 	if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
 		mutex_lock(&hpriv->restore_phase_mutex);
@@ -1661,9 +1668,10 @@ wait_again:
 			}
 		}
 
-		ctx->thread_ctx_switch_wait_token = 1;
+		if (hdev->supports_ctx_switch)
+			ctx->thread_ctx_switch_wait_token = 1;
 
-	} else if (!ctx->thread_ctx_switch_wait_token) {
+	} else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) {
 		rc = hl_poll_timeout_memory(hdev,
 			&ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
 			100, jiffies_to_usecs(hdev->timeout_jiffies), false);
@@ -2351,6 +2359,41 @@ out:
 	return rc;
 }
 
+static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
+						u32 num_engine_cores, u32 core_command)
+{
+	int rc;
+	struct hl_device *hdev = hpriv->hdev;
+	void __user *engine_cores_arr;
+	u32 *cores;
+
+	if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) {
+		dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores);
+		return -EINVAL;
+	}
+
+	if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) {
+		dev_err(hdev->dev, "Engine core command is invalid\n");
+		return -EINVAL;
+	}
+
+	engine_cores_arr = (void __user *) (uintptr_t) engine_cores;
+	cores = kmalloc_array(num_engine_cores, sizeof(u32), GFP_KERNEL);
+	if (!cores)
+		return -ENOMEM;
+
+	if (copy_from_user(cores, engine_cores_arr, num_engine_cores * sizeof(u32))) {
+		dev_err(hdev->dev, "Failed to copy core-ids array from user\n");
+		kfree(cores);
+		return -EFAULT;