diff options
| author | Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> | 2023-01-17 10:27:17 +0100 |
|---|---|---|
| committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2023-01-19 11:07:01 +0100 |
| commit | 35b137630f08d913fc2e33df33ccc2570dff3f7d (patch) | |
| tree | fcd8da6b5ba0bff2dfcc40045a9bad7b61faefc1 | |
| parent | 6f84981772535e670e4e2df051a672af229b6694 (diff) | |
| download | linux-35b137630f08d913fc2e33df33ccc2570dff3f7d.tar.gz linux-35b137630f08d913fc2e33df33ccc2570dff3f7d.tar.bz2 linux-35b137630f08d913fc2e33df33ccc2570dff3f7d.zip | |
accel/ivpu: Introduce a new DRM driver for Intel VPU
VPU stands for Versatile Processing Unit and it's a CPU-integrated
inference accelerator for Computer Vision and Deep Learning
applications.
The VPU device consist of following components:
- Buttress - provides CPU to VPU integration, interrupt, frequency and
power management.
- Memory Management Unit (based on ARM MMU-600) - translates VPU to
host DMA addresses, isolates user workloads.
- RISC based microcontroller - executes firmware that provides job
execution API for the kernel-mode driver
- Neural Compute Subsystem (NCS) - does the actual work, provides
Compute and Copy engines.
- Network on Chip (NoC) - network fabric connecting all the components
This driver supports VPU IP v2.7 integrated into Intel Meteor Lake
client CPUs (14th generation).
Module sources are at drivers/accel/ivpu and module name is
"intel_vpu.ko".
This patch includes only very besic functionality:
- module, PCI device and IRQ initialization
- register definitions and low level register manipulation functions
- SET/GET_PARAM ioctls
- power up without firmware
Co-developed-by: Krystian Pradzynski <krystian.pradzynski@linux.intel.com>
Signed-off-by: Krystian Pradzynski <krystian.pradzynski@linux.intel.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20230117092723.60441-2-jacek.lawrynowicz@linux.intel.com
| -rw-r--r-- | MAINTAINERS | 9 | ||||
| -rw-r--r-- | drivers/Makefile | 1 | ||||
| -rw-r--r-- | drivers/accel/Kconfig | 2 | ||||
| -rw-r--r-- | drivers/accel/Makefile | 3 | ||||
| -rw-r--r-- | drivers/accel/ivpu/Kconfig | 15 | ||||
| -rw-r--r-- | drivers/accel/ivpu/Makefile | 8 | ||||
| -rw-r--r-- | drivers/accel/ivpu/TODO | 11 | ||||
| -rw-r--r-- | drivers/accel/ivpu/ivpu_drv.c | 353 | ||||
| -rw-r--r-- | drivers/accel/ivpu/ivpu_drv.h | 162 | ||||
| -rw-r--r-- | drivers/accel/ivpu/ivpu_hw.h | 170 | ||||
| -rw-r--r-- | drivers/accel/ivpu/ivpu_hw_mtl.c | 1048 | ||||
| -rw-r--r-- | drivers/accel/ivpu/ivpu_hw_mtl_reg.h | 280 | ||||
| -rw-r--r-- | drivers/accel/ivpu/ivpu_hw_reg_io.h | 115 | ||||
| -rw-r--r-- | include/uapi/drm/ivpu_accel.h | 95 |
14 files changed, 2272 insertions, 0 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 1bd2a1d3f14f..071f7b62a36e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6894,6 +6894,15 @@ T: git https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/accel.git F: Documentation/accel/ F: drivers/accel/ +DRM ACCEL DRIVERS FOR INTEL VPU +M: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> +M: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com> +L: dri-devel@lists.freedesktop.org +S: Supported +T: git git://anongit.freedesktop.org/drm/drm-misc +F: drivers/accel/ivpu/ +F: include/uapi/drm/ivpu_accel.h + DRM DRIVERS FOR ALLWINNER A10 M: Maxime Ripard <mripard@kernel.org> M: Chen-Yu Tsai <wens@csie.org> diff --git a/drivers/Makefile b/drivers/Makefile index bdf1c66141c9..f0972e2226c9 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -189,3 +189,4 @@ obj-$(CONFIG_COUNTER) += counter/ obj-$(CONFIG_MOST) += most/ obj-$(CONFIG_PECI) += peci/ obj-$(CONFIG_HTE) += hte/ +obj-$(CONFIG_DRM_ACCEL) += accel/
\ No newline at end of file diff --git a/drivers/accel/Kconfig b/drivers/accel/Kconfig index c9ce849b2984..4989376e5938 100644 --- a/drivers/accel/Kconfig +++ b/drivers/accel/Kconfig @@ -22,3 +22,5 @@ menuconfig DRM_ACCEL major number than GPUs, and will be exposed to user-space using different device files, called accel/accel* (in /dev, sysfs and debugfs). + +source "drivers/accel/ivpu/Kconfig" diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile new file mode 100644 index 000000000000..b1036dbc0ba4 --- /dev/null +++ b/drivers/accel/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-y += ivpu/ diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig new file mode 100644 index 000000000000..9bdf168bf1d0 --- /dev/null +++ b/drivers/accel/ivpu/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config DRM_ACCEL_IVPU + tristate "Intel VPU for Meteor Lake and newer" + depends on DRM_ACCEL + depends on X86_64 && !UML + depends on PCI && PCI_MSI + select FW_LOADER + select SHMEM + help + Choose this option if you have a system that has an 14th generation Intel CPU + or newer. VPU stands for Versatile Processing Unit and it's a CPU-integrated + inference accelerator for Computer Vision and Deep Learning applications. + + If "M" is selected, the module will be called intel_vpu. diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile new file mode 100644 index 000000000000..2cc8394101cd --- /dev/null +++ b/drivers/accel/ivpu/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2023 Intel Corporation + +intel_vpu-y := \ + ivpu_drv.o \ + ivpu_hw_mtl.o + +obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o
\ No newline at end of file diff --git a/drivers/accel/ivpu/TODO b/drivers/accel/ivpu/TODO new file mode 100644 index 000000000000..9077217ae10f --- /dev/null +++ b/drivers/accel/ivpu/TODO @@ -0,0 +1,11 @@ +- Move to threaded_irqs to mitigate potential infinite loop in ivpu_ipc_irq_handler() +- Implement support for BLOB IDs +- Add debugfs support to improve debugging and testing +- Add tracing events for performance debugging +- Implement HW based scheduling support +- Use syncobjs for submit/sync +- Refactor IPC protocol to improve message latency +- Implement BO cache and MADVISE IOCTL +- Add support for user allocated buffers using prime import and dma-buf heaps +- Refactor struct ivpu_bo to use struct drm_gem_shmem_object +- Add driver/device documentation diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c new file mode 100644 index 000000000000..1134fb0028ad --- /dev/null +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -0,0 +1,353 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include <linux/firmware.h> +#include <linux/module.h> +#include <linux/pci.h> + +#include <drm/drm_accel.h> +#include <drm/drm_drv.h> +#include <drm/drm_file.h> +#include <drm/drm_gem.h> +#include <drm/drm_ioctl.h> + +#include "ivpu_drv.h" +#include "ivpu_hw.h" + +#ifndef DRIVER_VERSION_STR +#define DRIVER_VERSION_STR __stringify(DRM_IVPU_DRIVER_MAJOR) "." \ + __stringify(DRM_IVPU_DRIVER_MINOR) "." +#endif + +static const struct drm_driver driver; + +int ivpu_dbg_mask; +module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644); +MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros."); + +u8 ivpu_pll_min_ratio; +module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644); +MODULE_PARM_DESC(pll_min_ratio, "Minimum PLL ratio used to set VPU frequency"); + +u8 ivpu_pll_max_ratio = U8_MAX; +module_param_named(pll_max_ratio, ivpu_pll_max_ratio, byte, 0644); +MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set VPU frequency"); + +struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv) +{ + kref_get(&file_priv->ref); + return file_priv; +} + +static void file_priv_release(struct kref *ref) +{ + struct ivpu_file_priv *file_priv = container_of(ref, struct ivpu_file_priv, ref); + + kfree(file_priv); +} + +void ivpu_file_priv_put(struct ivpu_file_priv **link) +{ + struct ivpu_file_priv *file_priv = *link; + + WARN_ON(!file_priv); + + *link = NULL; + kref_put(&file_priv->ref, file_priv_release); +} + +static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct pci_dev *pdev = to_pci_dev(vdev->drm.dev); + struct drm_ivpu_param *args = data; + int ret = 0; + + switch (args->param) { + case DRM_IVPU_PARAM_DEVICE_ID: + args->value = pdev->device; + break; + case DRM_IVPU_PARAM_DEVICE_REVISION: + args->value = pdev->revision; + break; + case DRM_IVPU_PARAM_PLATFORM_TYPE: + args->value = vdev->platform; + break; + case DRM_IVPU_PARAM_CORE_CLOCK_RATE: + args->value = ivpu_hw_reg_pll_freq_get(vdev); + break; + case DRM_IVPU_PARAM_NUM_CONTEXTS: + args->value = ivpu_get_context_count(vdev); + break; + case DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS: + args->value = vdev->hw->ranges.user_low.start; + break; + case DRM_IVPU_PARAM_CONTEXT_PRIORITY: + args->value = file_priv->priority; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int ivpu_set_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct drm_ivpu_param *args = data; + int ret = 0; + + switch (args->param) { + case DRM_IVPU_PARAM_CONTEXT_PRIORITY: + if (args->value <= DRM_IVPU_CONTEXT_PRIORITY_REALTIME) + file_priv->priority = args->value; + else + ret = -EINVAL; + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static int ivpu_open(struct drm_device *dev, struct drm_file *file) +{ + struct ivpu_device *vdev = to_ivpu_device(dev); + struct ivpu_file_priv *file_priv; + + file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); + if (!file_priv) + return -ENOMEM; + + file_priv->vdev = vdev; + file_priv->priority = DRM_IVPU_CONTEXT_PRIORITY_NORMAL; + kref_init(&file_priv->ref); + + file->driver_priv = file_priv; + return 0; +} + +static void ivpu_postclose(struct drm_device *dev, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + + ivpu_file_priv_put(&file_priv); +} + +static const struct drm_ioctl_desc ivpu_drm_ioctls[] = { + DRM_IOCTL_DEF_DRV(IVPU_GET_PARAM, ivpu_get_param_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_SET_PARAM, ivpu_set_param_ioctl, 0), +}; + +int ivpu_shutdown(struct ivpu_device *vdev) +{ + int ret; + + ivpu_hw_irq_disable(vdev); + + ret = ivpu_hw_power_down(vdev); + if (ret) + ivpu_warn(vdev, "Failed to power down HW: %d\n", ret); + + return ret; +} + +static const struct file_operations ivpu_fops = { + .owner = THIS_MODULE, + .mmap = drm_gem_mmap, + DRM_ACCEL_FOPS, +}; + +static const struct drm_driver driver = { + .driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL, + + .open = ivpu_open, + .postclose = ivpu_postclose, + + .ioctls = ivpu_drm_ioctls, + .num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls), + .fops = &ivpu_fops, + + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRM_IVPU_DRIVER_MAJOR, + .minor = DRM_IVPU_DRIVER_MINOR, +}; + +static int ivpu_irq_init(struct ivpu_device *vdev) +{ + struct pci_dev *pdev = to_pci_dev(vdev->drm.dev); + int ret; + + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (ret < 0) { + ivpu_err(vdev, "Failed to allocate a MSI IRQ: %d\n", ret); + return ret; + } + + vdev->irq = pci_irq_vector(pdev, 0); + + ret = devm_request_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler, + IRQF_NO_AUTOEN, DRIVER_NAME, vdev); + if (ret) + ivpu_err(vdev, "Failed to request an IRQ %d\n", ret); + + return ret; +} + +static int ivpu_pci_init(struct ivpu_device *vdev) +{ + struct pci_dev *pdev = to_pci_dev(vdev->drm.dev); + struct resource *bar0 = &pdev->resource[0]; + struct resource *bar4 = &pdev->resource[4]; + int ret; + + ivpu_dbg(vdev, MISC, "Mapping BAR0 (RegV) %pR\n", bar0); + vdev->regv = devm_ioremap_resource(vdev->drm.dev, bar0); + if (IS_ERR(vdev->regv)) { + ivpu_err(vdev, "Failed to map bar 0: %pe\n", vdev->regv); + return PTR_ERR(vdev->regv); + } + + ivpu_dbg(vdev, MISC, "Mapping BAR4 (RegB) %pR\n", bar4); + vdev->regb = devm_ioremap_resource(vdev->drm.dev, bar4); + if (IS_ERR(vdev->regb)) { + ivpu_err(vdev, "Failed to map bar 4: %pe\n", vdev->regb); + return PTR_ERR(vdev->regb); + } + + ret = dma_set_mask_and_coherent(vdev->drm.dev, DMA_BIT_MASK(38)); + if (ret) { + ivpu_err(vdev, "Failed to set DMA mask: %d\n", ret); + return ret; + } + + /* Clear any pending errors */ + pcie_capability_clear_word(pdev, PCI_EXP_DEVSTA, 0x3f); + + ret = pcim_enable_device(pdev); + if (ret) { + ivpu_err(vdev, "Failed to enable PCI device: %d\n", ret); + return ret; + } + + pci_set_master(pdev); + + return 0; +} + +static int ivpu_dev_init(struct ivpu_device *vdev) +{ + int ret; + + vdev->hw = drmm_kzalloc(&vdev->drm, sizeof(*vdev->hw), GFP_KERNEL); + if (!vdev->hw) + return -ENOMEM; + + vdev->hw->ops = &ivpu_hw_mtl_ops; + vdev->platform = IVPU_PLATFORM_INVALID; + vdev->context_xa_limit.min = IVPU_GLOBAL_CONTEXT_MMU_SSID + 1; + vdev->context_xa_limit.max = IVPU_CONTEXT_LIMIT; + xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC); + + ret = ivpu_pci_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize PCI device: %d\n", ret); + goto err_xa_destroy; + } + + ret = ivpu_irq_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize IRQs: %d\n", ret); + goto err_xa_destroy; + } + + /* Init basic HW info based on buttress registers which are accessible before power up */ + ret = ivpu_hw_info_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize HW info: %d\n", ret); + goto err_xa_destroy; + } + + /* Power up early so the rest of init code can access VPU registers */ + ret = ivpu_hw_power_up(vdev); + if (ret) { + ivpu_err(vdev, "Failed to power up HW: %d\n", ret); + goto err_xa_destroy; + } + + return 0; + +err_xa_destroy: + xa_destroy(&vdev->context_xa); + return ret; +} + +static void ivpu_dev_fini(struct ivpu_device *vdev) +{ + ivpu_shutdown(vdev); + + drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->context_xa)); + xa_destroy(&vdev->context_xa); +} + +static struct pci_device_id ivpu_pci_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_MTL) }, + { } +}; +MODULE_DEVICE_TABLE(pci, ivpu_pci_ids); + +static int ivpu_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct ivpu_device *vdev; + int ret; + + vdev = devm_drm_dev_alloc(&pdev->dev, &driver, struct ivpu_device, drm); + if (IS_ERR(vdev)) + return PTR_ERR(vdev); + + pci_set_drvdata(pdev, vdev); + + ret = ivpu_dev_init(vdev); + if (ret) { + dev_err(&pdev->dev, "Failed to initialize VPU device: %d\n", ret); + return ret; + } + + ret = drm_dev_register(&vdev->drm, 0); + if (ret) { + dev_err(&pdev->dev, "Failed to register DRM device: %d\n", ret); + ivpu_dev_fini(vdev); + } + + return ret; +} + +static void ivpu_remove(struct pci_dev *pdev) +{ + struct ivpu_device *vdev = pci_get_drvdata(pdev); + + drm_dev_unregister(&vdev->drm); + ivpu_dev_fini(vdev); +} + +static struct pci_driver ivpu_pci_driver = { + .name = KBUILD_MODNAME, + .id_table = ivpu_pci_ids, + .probe = ivpu_probe, + .remove = ivpu_remove, +}; + +module_pci_driver(ivpu_pci_driver); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL and additional rights"); +MODULE_VERSION(DRIVER_VERSION_STR); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h new file mode 100644 index 000000000000..d0b006893b1c --- /dev/null +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_DRV_H__ +#define __IVPU_DRV_H__ + +#include <drm/drm_device.h> +#include <drm/drm_managed.h> +#include <drm/drm_mm.h> +#include <drm/drm_print.h> + +#include <linux/pci.h> +#include <linux/xarray.h> +#include <uapi/drm/ivpu_accel.h> + +#define DRIVER_NAME "intel_vpu" +#define DRIVER_DESC "Driver for Intel Versatile Processing Unit (VPU)" +#define DRIVER_DATE "20230117" + +#define PCI_DEVICE_ID_MTL 0x7d1d + +#define IVPU_GLOBAL_CONTEXT_MMU_SSID 0 +#define IVPU_CONTEXT_LIMIT 64 +#define IVPU_NUM_ENGINES 2 + +#define IVPU_PLATFORM_SILICON 0 +#define IVPU_PLATFORM_SIMICS 2 +#define IVPU_PLATFORM_FPGA 3 +#define IVPU_PLATFORM_INVALID 8 + +#define IVPU_DBG_REG BIT(0) +#define IVPU_DBG_IRQ BIT(1) +#define IVPU_DBG_MMU BIT(2) +#define IVPU_DBG_FILE BIT(3) +#define IVPU_DBG_MISC BIT(4) +#define IVPU_DBG_FW_BOOT BIT(5) +#define IVPU_DBG_PM BIT(6) +#define IVPU_DBG_IPC BIT(7) +#define IVPU_DBG_BO BIT(8) +#define IVPU_DBG_JOB BIT(9) +#define IVPU_DBG_JSM BIT(10) +#define IVPU_DBG_KREF BIT(11) +#define IVPU_DBG_RPM BIT(12) + +#define ivpu_err(vdev, fmt, ...) \ + drm_err(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__) + +#define ivpu_err_ratelimited(vdev, fmt, ...) \ + drm_err_ratelimited(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__) + +#define ivpu_warn(vdev, fmt, ...) \ + drm_warn(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__) + +#define ivpu_warn_ratelimited(vdev, fmt, ...) \ + drm_err_ratelimited(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__) + +#define ivpu_info(vdev, fmt, ...) drm_info(&(vdev)->drm, fmt, ##__VA_ARGS__) + +#define ivpu_dbg(vdev, type, fmt, args...) do { \ + if (unlikely(IVPU_DBG_##type & ivpu_dbg_mask)) \ + dev_dbg((vdev)->drm.dev, "[%s] " fmt, #type, ##args); \ +} while (0) + +#define IVPU_WA(wa_name) (vdev->wa.wa_name) + +struct ivpu_wa_table { + bool punit_disabled; + bool clear_runtime_mem; +}; + +struct ivpu_hw_info; + +struct ivpu_device { + struct drm_device drm; + void __iomem *regb; + void __iomem *regv; + u32 platform; + u32 irq; + + struct ivpu_wa_table wa; + struct ivpu_hw_info *hw; + + struct xarray context_xa; + struct xa_limit context_xa_limit; + + struct { + int boot; + int jsm; + int tdr; + int reschedule_suspend; + } timeout; +}; + +/* + * file_priv has its own refcount (ref) that allows user space to close the fd + * without blocking even if VPU is still processing some jobs. + */ +struct ivpu_file_priv { + struct kref ref; + struct ivpu_device *vdev; + u32 priority; +}; + +extern int ivpu_dbg_mask; +extern u8 ivpu_pll_min_ratio; +extern u8 ivpu_pll_max_ratio; + +struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv); +void ivpu_file_priv_put(struct ivpu_file_priv **link); +int ivpu_shutdown(struct ivpu_device *vdev); + +static inline bool ivpu_is_mtl(struct ivpu_device *vdev) +{ + return to_pci_dev(vdev->drm.dev)->device == PCI_DEVICE_ID_MTL; +} + +static inline u8 ivpu_revision(struct ivpu_device *vdev) +{ + return to_pci_dev(vdev->drm.dev)->revision; +} + +static inline u16 ivpu_device_id(struct ivpu_device *vdev) +{ + return to_pci_dev(vdev->drm.dev)->device; +} + +static inline struct ivpu_device *to_ivpu_device(struct drm_device *dev) +{ + return container_of(dev, struct ivpu_device, drm); +} + +static inline u32 ivpu_get_context_count(struct ivpu_device *vdev) +{ + struct xa_limit ctx_limit = vdev->context_xa_limit; + + return (ctx_limit.max - ctx_limit.min + 1); +} + +static inline u32 ivpu_get_platform(struct ivpu_device *vdev) +{ + WARN_ON_ONCE(vdev->platform == IVPU_PLATFORM_INVALID); + return vdev->platform; +} + +static inline bool ivpu_is_silicon(struct ivpu_device *vdev) +{ + return ivpu_get_platform(vdev) == IVPU_PLATFORM_SILICON; +} + +static inline bool ivpu_is_simics(struct ivpu_device *vdev) +{ + return ivpu_get_platform(vdev) == IVPU_PLATFORM_SIMICS; +} + +static inline bool ivpu_is_fpga(struct ivpu_device *vdev) +{ + return ivpu_get_platform(vdev) == IVPU_PLATFORM_FPGA; +} + +#endif /* __IVPU_DRV_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h new file mode 100644 index 000000000000..50a9304ab09c --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw.h @@ -0,0 +1,170 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_HW_H__ +#define __IVPU_HW_H__ + +#include "ivpu_drv.h" + +struct ivpu_hw_ops { + int (*info_init)(struct ivpu_device *vdev); + int (*power_up)(struct ivpu_device *vdev); + int (*boot_fw)(struct ivpu_device *vdev); + int (*power_down)(struct ivpu_device *vdev); + bool (*is_idle)(struct ivpu_device *vdev); + void (*wdt_disable)(struct ivpu_device *vdev); + void (*diagnose_failure)(struct ivpu_device *vdev); + u32 (*reg_pll_freq_get)(struct ivpu_device *vdev); + u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev); + u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev); + u32 (*reg_telemetry_enable_get)(struct ivpu_device *vdev); + void (*reg_db_set)(struct ivpu_device *vdev, u32 db_id); + u32 (*reg_ipc_rx_addr_get)(struct ivpu_device *vdev); + u32 (*reg_ipc_rx_count_get)(struct ivpu_device *vdev); + void (*reg_ipc_tx_set)(struct ivpu_device *vdev, u32 vpu_addr); + void (*irq_clear)(struct ivpu_device *vdev); + void (*irq_enable)(struct ivpu_device *vdev); + void (*irq_disable)(struct ivpu_device *vdev); + irqreturn_t (*irq_handler)(int irq, void *ptr); +}; + +struct ivpu_addr_range { + resource_size_t start; + resource_size_t end; +}; + +struct ivpu_hw_info { + const struct ivpu_hw_ops *ops; + struct { + struct ivpu_addr_range global_low; + struct ivpu_addr_range global_high; + struct ivpu_addr_range user_low; + struct ivpu_addr_range user_high; + struct ivpu_addr_range global_aliased_pio; + } ranges; + struct { + u8 min_ratio; + u8 max_ratio; + /* + * Pll ratio for the efficiency frequency. The VPU has optimum + * performance to power ratio at this frequency. + */ + u8 pn_ratio; + u32 profiling_freq; + } pll; + u32 tile_fuse; + u32 sku; + u16 config; +}; + +extern const struct ivpu_hw_ops ivpu_hw_mtl_ops; + +static inline int ivpu_hw_info_init(struct ivpu_device *vdev) +{ + return vdev->hw->ops->info_init(vdev); +}; + +static inline int ivpu_hw_power_up(struct ivpu_device *vdev) +{ + ivpu_dbg(vdev, PM, "HW power up\n"); + + return vdev->hw->ops->power_up(vdev); +}; + +static inline int ivpu_hw_boot_fw(struct ivpu_device *vdev) +{ + return vdev->hw->ops->boot_fw(vdev); +}; + +static inline bool ivpu_hw_is_idle(struct ivpu_device *vdev) +{ + return vdev->hw->ops->is_idle(vdev); +}; + +static inline int ivpu_hw_power_down(struct ivpu_device *vdev) +{ + ivpu_dbg(vdev, PM, "HW power down\n"); + + return vdev->hw->ops->power_down(vdev); +}; + +static inline void ivpu_hw_wdt_disable(struct ivpu_device *vdev) +{ + vdev->hw->ops->wdt_disable(vdev); +}; + +/* Register indirect accesses */ +static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev) +{ + return vdev->hw->ops->reg_pll_freq_get(vdev); +}; + +static inline u32 ivpu_hw_reg_telemetry_offset_get(struct ivpu_device *vdev) +{ + return vdev->hw->ops->reg_telemetry_offset_get(vdev); +}; + +static inline u32 ivpu_hw_reg_telemetry_size_get(struct ivpu_device *vdev) +{ + return vdev->hw->ops->reg_telemetry_size_get(vdev); +}; + +static inline u32 ivpu_hw_reg_telemetry_enable_get(struct ivpu_device *vdev) +{ + return vdev->hw->ops->reg_telemetry_enable_get(vdev); +}; + +static inline void ivpu_hw_reg_db_set(struct ivpu_device *vdev, u32 db_id) +{ + vdev->hw->ops->reg_db_set(vdev, db_id); +}; + +static inline u32 ivpu_hw_reg_ipc_rx_addr_get(struct ivpu_device *vdev) +{ + return vdev->hw->ops->reg_ipc_rx_addr_get(vdev); +}; + +static inline u32 ivpu_hw_reg_ipc_rx_count_get(struct ivpu_device *vdev) +{ + return vdev->hw->ops->reg_ipc_rx_count_get(vdev); +}; + +static inline void ivpu_hw_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) +{ + vdev->hw->ops->reg_ipc_tx_set(vdev, vpu_addr); +}; + +static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev) +{ + vdev->hw->ops->irq_clear(vdev); +}; + +static inline void ivpu_hw_irq_enable(struct ivpu_device *vdev) +{ + vdev->hw->ops->irq_enable(vdev); +}; + +static inline void ivpu_hw_irq_disable(struct ivpu_device *vdev) +{ + vdev->hw->ops->irq_disable(vdev); +}; + +static inline void ivpu_hw_init_range(struct ivpu_addr_range *range, u64 start, u64 size) +{ + range->start = start; + range->end = start + size; +} + +static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range) +{ + return range->end - range->start; +} + +static inline void ivpu_hw_diagnose_failure(struct ivpu_device *vdev) +{ + vdev->hw->ops->diagnose_failure(vdev); +} + +#endif /* __IVPU_HW_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_mtl.c b/drivers/accel/ivpu/ivpu_hw_mtl.c new file mode 100644 index 000000000000..8a1b1c03f77b --- /dev/null +++ b/drivers/accel/ivpu/ivpu_hw_mtl.c @@ -0,0 +1,1048 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include "ivpu_drv.h" +#include "ivpu_hw_mtl_reg.h" +#include "ivpu_hw_reg_io.h" +#include "ivpu_hw.h" + +#define TILE_FUSE_ENABLE_BOTH 0x0 +#define TILE_FUSE_ENABLE_UPPER 0x1 +#define TILE_FUSE_ENABLE_LOWER 0x2 + +#define TILE_SKU_BOTH_MTL 0x3630 +#define TILE_SKU_LOWER_MTL 0x3631 +#define TILE_SKU_UPPER_MTL 0x3632 + +/* Work point configuration values */ +#define WP_CONFIG_1_TILE_5_3_RATIO 0x0101 +#define WP_CONFIG_1_TILE_4_3_RATIO 0x0102 +#define WP_CONFIG_2_TILE_5_3_RATIO 0x0201 +#define WP_CONFIG_2_TILE_4_3_RATIO 0x0202 +#define WP_CONFIG_0_TILE_PLL_OFF 0x0000 + +#define PLL_REF_CLK_FREQ (50 * 1000000) +#define PLL_SIMULATION_FREQ (10 * 1000000) +#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ) +#define PLL_DEFAULT_EPP_VALUE 0x80 + +#define TIM_SAFE_ENABLE 0xf1d0dead +#define TIM_WATCHDOG_RESET_VALUE 0xffffffff + +#define TIMEOUT_US (150 * USEC_PER_MSEC) +#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC) +#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) +#define IDLE_TIMEOUT_US (500 * USEC_PER_MSEC) + +#define ICB_0_IRQ_MASK ((REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, MMU_IRQ_2_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_0_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_0, CPU_INT_REDIRECT_1_INT))) + +#define ICB_1_IRQ_MASK ((REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_2_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_3_INT)) | \ + (REG_FLD(MTL_VPU_HOST_SS_ICB_STATUS_1, CPU_INT_REDIRECT_4_INT))) + +#define ICB_0_1_IRQ_MASK ((((u64)ICB_1_IRQ_MASK) << 32) | ICB_0_IRQ_MASK) + +#define BUTTRESS_IRQ_MASK ((REG_FLD(MTL_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE)) | \ + (REG_FLD(MTL_BUTTRESS_INTERRUPT_STAT, ATS_ERR)) | \ + (REG_FLD(MTL_BUTTRESS_INTERRUPT_STAT, UFI_ERR))) + +#define BUTTRESS_IRQ_ENABLE_MASK ((u32)~BUTTRESS_IRQ_MASK) +#define BUTTRESS_IRQ_DISABLE_MASK ((u32)-1) + +#define ITF_FIREWALL_VIOLATION_MASK ((REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, CSS_ROM_CMX)) | \ + (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, CSS_DBG)) | \ + (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, CSS_CTRL)) | \ + (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, DEC400)) | \ + (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, MSS_NCE)) | \ + (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \ + (REG_FLD(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX))) + +static char *ivpu_platform_to_str(u32 platform) +{ + switch (platform) { + case IVPU_PLATFORM_SILICON: + return "IVPU_PLATFORM_SILICON"; + case IVPU_PLATFORM_SIMICS: + return "IVPU_PLATFORM_SIMICS"; + case IVPU_PLATFORM_FPGA: + return "IVPU_PLATFORM_FPGA"; + default: + return "Invalid platform"; + } +} + +static void ivpu_hw_read_platform(struct ivpu_device *vdev) +{ + u32 gen_ctrl = REGV_RD32(MTL_VPU_HOST_SS_GEN_CTRL); + u32 platform = REG_GET_FLD(MTL_VPU_HOST_SS_GEN_CTRL, PS, gen_ctrl); + + if (platform == IVPU_PLATFORM_SIMICS || platform == IVPU_PLATFORM_FPGA) + vdev->platform = platform; + else + vdev->platform = IVPU_PLATFORM_SILICON; + + ivpu_dbg(vdev, MISC, "Platform type: %s (%d)\n", + ivpu_platform_to_str(vdev->platform), vdev->platform); +} + +static void ivpu_hw_wa_init(struct ivpu_device *vdev) +{ + vdev->wa.punit_disabled = ivpu_is_fpga(vdev); + vdev->wa.clear_runtime_mem = false; +} + +static void ivpu_hw_timeouts_init(struct ivpu_device *vdev) +{ + if (ivpu_is_simics(vdev) || ivpu_is_fpga(vdev)) { + vdev->timeout.boot = 100000; + vdev->timeout.jsm = 50000; + vdev->timeout.tdr = 2000000; + vdev->timeout.reschedule_suspend = 1000; + } else { + vdev->timeout.boot = 1000; + vdev->timeout.jsm = 500; + vdev->timeout.tdr = 2000; + vdev->timeout.reschedule_suspend = 10; + } +} + +static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev) +{ + return REGB_POLL_FLD(MTL_BUTTRESS_WP_REQ_CMD, SEND, 0, PLL_TIMEOUT_US); +} + +/* Send KMD initiated workpoint change */ +static int ivpu_pll_cmd_send(struct ivpu_device *vdev, u16 min_ratio, u16 max_ratio, + u16 target_ratio, u16 config) +{ + int ret; + u32 val; + + ret = ivpu_pll_wait_for_cmd_send(vdev); + if (ret) { + ivpu_err(vdev, "Failed to sync before WP request: %d\n", ret); + return ret; + } + + val = REGB_RD32(MTL_BUTTRESS_WP_REQ_PAYLOAD0); + val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD0, MIN_RATIO, min_ratio, val); + val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD0, MAX_RATIO, max_ratio, val); + REGB_WR32(MTL_BUTTRESS_WP_REQ_PAYLOAD0, val); + + val = REGB_RD32(MTL_BUTTRESS_WP_REQ_PAYLOAD1); + val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD1, TARGET_RATIO, target_ratio, val); + val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD1, EPP, PLL_DEFAULT_EPP_VALUE, val); + REGB_WR32(MTL_BUTTRESS_WP_REQ_PAYLOAD1, val); + + val = REGB_RD32(MTL_BUTTRESS_WP_REQ_PAYLOAD2); + val = REG_SET_FLD_NUM(MTL_BUTTRESS_WP_REQ_PAYLOAD2, CONFIG, config, val); + REGB_WR32(MTL_BUTTRESS_WP_REQ_PAYLOAD2, val); + + val = REGB_RD32(MTL_BUTTRESS_WP_REQ_CMD); + val = REG_SET_FLD(MTL_BUTTRESS_WP_REQ_CMD, SEND, val); + REGB_WR32(MTL_BUTTRESS_WP_REQ_CMD, val); + + ret = ivpu_pll_wait_for_cmd_send(vdev); + if (ret) + ivpu_err(vdev, "Failed to sync after WP request: %d\n", ret); + + return ret; +} + +static int ivpu_pll_wait_for_lock(struct ivpu_device *vdev, bool enable) +{ + u32 exp_val = enable ? 0x1 : 0x0; + + if (IVPU_WA(punit_disabled)) + return 0; + + return REGB_POLL_FLD(MTL_BUTTRESS_PLL_STATUS, LOCK, exp_val, PLL_TIMEOUT_US); +} + +static int ivpu_pll_wait_for_status_ready(struct ivpu_device *vdev) +{ + if (IVPU_WA(punit_disabled)) + return 0; + + return REGB_POLL_FLD(MTL_BUTTRESS_VPU_STATUS, READY, 1, PLL_TIMEOUT_US); +} + +static void ivpu_pll_init_frequency_ratios(struct ivpu_devi |
