From 37b82b5de77083ada0202da9001ecec9affe4b10 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 4 Apr 2012 16:39:58 -0400 Subject: arch/tile: introduce GXIO IORPC framework for tilegx The GXIO I/O RPC subsystem handles exporting I/O hardware resources to Linux and to applications running under Linux. For instance, memory which is made available for I/O DMA must be mapped by an I/O TLB; that means that such memory must be locked down by Linux, so that it is not swapped or otherwise reused, as long as those I/O TLB entries are active. Similarly, configuring direct hardware access introduces new validation requirements. If a user application registers memory, Linux must ensure that the supplied virtual addresses are valid, and turn them into client physical addresses. Similarly, when Linux then supplies those client physical addresses to the Tilera hypervisor, it must in turn validate those before turning them into the real physical addresses which are required by the hardware. To the extent that these sorts of activities were required on previous TILE architecture processors, they were implemented in a device-specific fashion. This meant that every I/O device had its own Tilera hypervisor driver, its own Linux driver, and in some cases its own user-level library support. There was a large amount of more-or-less functionally identical code in different places, particularly in the different Linux drivers. For TILE-Gx, this support has been generalized into a common framework, known as the I/O RPC framework or just IORPC. The two "gxio" directories (one for headers, one for sources) start with just a few files in each with this infrastructure commit, but after adding support for the on-board I/O shims for networking, PCI, USB, crypto, compression, I2CS, etc., there end up being about 20 files in each directory. More information on the IORPC framework is in the header, included in this commit. Signed-off-by: Chris Metcalf --- arch/tile/Kconfig | 2 + arch/tile/Makefile | 2 + arch/tile/gxio/Kconfig | 5 + arch/tile/gxio/Makefile | 5 + arch/tile/gxio/iorpc_globals.c | 89 ++++ arch/tile/gxio/kiorpc.c | 61 +++ arch/tile/include/gxio/common.h | 40 ++ arch/tile/include/gxio/iorpc_globals.h | 38 ++ arch/tile/include/gxio/kiorpc.h | 29 ++ arch/tile/include/hv/iorpc.h | 714 +++++++++++++++++++++++++++++++++ 10 files changed, 985 insertions(+) create mode 100644 arch/tile/gxio/Kconfig create mode 100644 arch/tile/gxio/Makefile create mode 100644 arch/tile/gxio/iorpc_globals.c create mode 100644 arch/tile/gxio/kiorpc.c create mode 100644 arch/tile/include/gxio/common.h create mode 100644 arch/tile/include/gxio/iorpc_globals.h create mode 100644 arch/tile/include/gxio/kiorpc.h create mode 100644 arch/tile/include/hv/iorpc.h diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index fe128816c448..645979cfb718 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -345,6 +345,8 @@ config KERNEL_PL kernel will be built to run at. Generally you should use the default value here. +source "arch/tile/gxio/Kconfig" + endmenu # Tilera-specific configuration menu "Bus options" diff --git a/arch/tile/Makefile b/arch/tile/Makefile index e20b0a0b64a1..55640cf92597 100644 --- a/arch/tile/Makefile +++ b/arch/tile/Makefile @@ -59,6 +59,8 @@ libs-y += $(LIBGCC_PATH) # See arch/tile/Kbuild for content of core part of the kernel core-y += arch/tile/ +core-$(CONFIG_TILE_GXIO) += arch/tile/gxio/ + ifdef TILERA_ROOT INSTALL_PATH ?= $(TILERA_ROOT)/tile/boot endif diff --git a/arch/tile/gxio/Kconfig b/arch/tile/gxio/Kconfig new file mode 100644 index 000000000000..8eff47fe1236 --- /dev/null +++ b/arch/tile/gxio/Kconfig @@ -0,0 +1,5 @@ +# Support direct access to TILE-Gx hardware from user space, via the +# gxio library, or from kernel space, via kernel IORPC support. +config TILE_GXIO + bool + depends on TILEGX diff --git a/arch/tile/gxio/Makefile b/arch/tile/gxio/Makefile new file mode 100644 index 000000000000..db1ee2863d8e --- /dev/null +++ b/arch/tile/gxio/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the Tile-Gx device access support. +# + +obj-$(CONFIG_TILE_GXIO) += iorpc_globals.o kiorpc.o diff --git a/arch/tile/gxio/iorpc_globals.c b/arch/tile/gxio/iorpc_globals.c new file mode 100644 index 000000000000..e178e90805a2 --- /dev/null +++ b/arch/tile/gxio/iorpc_globals.c @@ -0,0 +1,89 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#include "gxio/iorpc_globals.h" + +struct arm_pollfd_param { + union iorpc_pollfd pollfd; +}; + +int __iorpc_arm_pollfd(int fd, int pollfd_cookie) +{ + struct arm_pollfd_param temp; + struct arm_pollfd_param *params = &temp; + + params->pollfd.kernel.cookie = pollfd_cookie; + + return hv_dev_pwrite(fd, 0, (HV_VirtAddr) params, sizeof(*params), + IORPC_OP_ARM_POLLFD); +} + +EXPORT_SYMBOL(__iorpc_arm_pollfd); + +struct close_pollfd_param { + union iorpc_pollfd pollfd; +}; + +int __iorpc_close_pollfd(int fd, int pollfd_cookie) +{ + struct close_pollfd_param temp; + struct close_pollfd_param *params = &temp; + + params->pollfd.kernel.cookie = pollfd_cookie; + + return hv_dev_pwrite(fd, 0, (HV_VirtAddr) params, sizeof(*params), + IORPC_OP_CLOSE_POLLFD); +} + +EXPORT_SYMBOL(__iorpc_close_pollfd); + +struct get_mmio_base_param { + HV_PTE base; +}; + +int __iorpc_get_mmio_base(int fd, HV_PTE *base) +{ + int __result; + struct get_mmio_base_param temp; + struct get_mmio_base_param *params = &temp; + + __result = + hv_dev_pread(fd, 0, (HV_VirtAddr) params, sizeof(*params), + IORPC_OP_GET_MMIO_BASE); + *base = params->base; + + return __result; +} + +EXPORT_SYMBOL(__iorpc_get_mmio_base); + +struct check_mmio_offset_param { + unsigned long offset; + unsigned long size; +}; + +int __iorpc_check_mmio_offset(int fd, unsigned long offset, unsigned long size) +{ + struct check_mmio_offset_param temp; + struct check_mmio_offset_param *params = &temp; + + params->offset = offset; + params->size = size; + + return hv_dev_pwrite(fd, 0, (HV_VirtAddr) params, sizeof(*params), + IORPC_OP_CHECK_MMIO_OFFSET); +} + +EXPORT_SYMBOL(__iorpc_check_mmio_offset); diff --git a/arch/tile/gxio/kiorpc.c b/arch/tile/gxio/kiorpc.c new file mode 100644 index 000000000000..c8096aa5a3fc --- /dev/null +++ b/arch/tile/gxio/kiorpc.c @@ -0,0 +1,61 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * TILE-Gx IORPC support for kernel I/O drivers. + */ + +#include +#include +#include +#include +#include + +#ifdef DEBUG_IORPC +#define TRACE(FMT, ...) pr_info(SIMPLE_MSG_LINE FMT, ## __VA_ARGS__) +#else +#define TRACE(...) +#endif + +/* Create kernel-VA-space MMIO mapping for an on-chip IO device. */ +void __iomem *iorpc_ioremap(int hv_fd, resource_size_t offset, + unsigned long size) +{ + pgprot_t mmio_base, prot = { 0 }; + unsigned long pfn; + int err; + + /* Look up the shim's lotar and base PA. */ + err = __iorpc_get_mmio_base(hv_fd, &mmio_base); + if (err) { + TRACE("get_mmio_base() failure: %d\n", err); + return NULL; + } + + /* Make sure the HV driver approves of our offset and size. */ + err = __iorpc_check_mmio_offset(hv_fd, offset, size); + if (err) { + TRACE("check_mmio_offset() failure: %d\n", err); + return NULL; + } + + /* + * mmio_base contains a base pfn and homing coordinates. Turn + * it into an MMIO pgprot and offset pfn. + */ + prot = hv_pte_set_lotar(prot, hv_pte_get_lotar(mmio_base)); + pfn = pte_pfn(mmio_base) + PFN_DOWN(offset); + + return ioremap_prot(PFN_PHYS(pfn), size, prot); +} + +EXPORT_SYMBOL(iorpc_ioremap); diff --git a/arch/tile/include/gxio/common.h b/arch/tile/include/gxio/common.h new file mode 100644 index 000000000000..724595a24d04 --- /dev/null +++ b/arch/tile/include/gxio/common.h @@ -0,0 +1,40 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _GXIO_COMMON_H_ +#define _GXIO_COMMON_H_ + +/* + * Routines shared between the various GXIO device components. + */ + +#include + +#include +#include +#include + +/* Define the standard gxio MMIO functions using kernel functions. */ +#define __gxio_mmio_read8(addr) readb(addr) +#define __gxio_mmio_read16(addr) readw(addr) +#define __gxio_mmio_read32(addr) readl(addr) +#define __gxio_mmio_read64(addr) readq(addr) +#define __gxio_mmio_write8(addr, val) writeb((val), (addr)) +#define __gxio_mmio_write16(addr, val) writew((val), (addr)) +#define __gxio_mmio_write32(addr, val) writel((val), (addr)) +#define __gxio_mmio_write64(addr, val) writeq((val), (addr)) +#define __gxio_mmio_read(addr) __gxio_mmio_read64(addr) +#define __gxio_mmio_write(addr, val) __gxio_mmio_write64((addr), (val)) + +#endif /* !_GXIO_COMMON_H_ */ diff --git a/arch/tile/include/gxio/iorpc_globals.h b/arch/tile/include/gxio/iorpc_globals.h new file mode 100644 index 000000000000..52c721f8dad9 --- /dev/null +++ b/arch/tile/include/gxio/iorpc_globals.h @@ -0,0 +1,38 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#ifndef __IORPC_LINUX_RPC_H__ +#define __IORPC_LINUX_RPC_H__ + +#include + +#include +#include +#include + +#define IORPC_OP_ARM_POLLFD IORPC_OPCODE(IORPC_FORMAT_KERNEL_POLLFD, 0x9000) +#define IORPC_OP_CLOSE_POLLFD IORPC_OPCODE(IORPC_FORMAT_KERNEL_POLLFD, 0x9001) +#define IORPC_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000) +#define IORPC_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001) + +int __iorpc_arm_pollfd(int fd, int pollfd_cookie); + +int __iorpc_close_pollfd(int fd, int pollfd_cookie); + +int __iorpc_get_mmio_base(int fd, HV_PTE *base); + +int __iorpc_check_mmio_offset(int fd, unsigned long offset, unsigned long size); + +#endif /* !__IORPC_LINUX_RPC_H__ */ diff --git a/arch/tile/include/gxio/kiorpc.h b/arch/tile/include/gxio/kiorpc.h new file mode 100644 index 000000000000..ee5820979ff3 --- /dev/null +++ b/arch/tile/include/gxio/kiorpc.h @@ -0,0 +1,29 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * Support routines for kernel IORPC drivers. + */ + +#ifndef _GXIO_KIORPC_H +#define _GXIO_KIORPC_H + +#include +#include +#include + +#if CHIP_HAS_MMIO() +void __iomem *iorpc_ioremap(int hv_fd, resource_size_t offset, + unsigned long size); +#endif + +#endif /* _GXIO_KIORPC_H */ diff --git a/arch/tile/include/hv/iorpc.h b/arch/tile/include/hv/iorpc.h new file mode 100644 index 000000000000..89c72a5d9341 --- /dev/null +++ b/arch/tile/include/hv/iorpc.h @@ -0,0 +1,714 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ +#ifndef _HV_IORPC_H_ +#define _HV_IORPC_H_ + +/** + * + * Error codes and struct definitions for the IO RPC library. + * + * The hypervisor's IO RPC component provides a convenient way for + * driver authors to proxy system calls between user space, linux, and + * the hypervisor driver. The core of the system is a set of Python + * files that take ".idl" files as input and generates the following + * source code: + * + * - _rpc_call() routines for use in userspace IO libraries. These + * routines take an argument list specified in the .idl file, pack the + * arguments in to a buffer, and read or write that buffer via the + * Linux iorpc driver. + * + * - dispatch_read() and dispatch_write() routines that hypervisor + * drivers can use to implement most of their dev_pread() and + * dev_pwrite() methods. These routines decode the incoming parameter + * blob, permission check and translate parameters where appropriate, + * and then invoke a callback routine for whichever RPC call has + * arrived. The driver simply implements the set of callback + * routines. + * + * The IO RPC system also includes the Linux 'iorpc' driver, which + * proxies calls between the userspace library and the hypervisor + * driver. The Linux driver is almost entirely device agnostic; it + * watches for special flags indicating cases where a memory buffer + * address might need to be translated, etc. As a result, driver + * writers can avoid many of the problem cases related to registering + * hardware resources like memory pages or interrupts. However, the + * drivers must be careful to obey the conventions documented below in + * order to work properly with the generic Linux iorpc driver. + * + * @section iorpc_domains Service Domains + * + * All iorpc-based drivers must support a notion of service domains. + * A service domain is basically an application context - state + * indicating resources that are allocated to that particular app + * which it may access and (perhaps) other applications may not + * access. Drivers can support any number of service domains they + * choose. In some cases the design is limited by a number of service + * domains supported by the IO hardware; in other cases the service + * domains are a purely software concept and the driver chooses a + * maximum number of domains based on how much state memory it is + * willing to preallocate. + * + * For example, the mPIPE driver only supports as many service domains + * as are supported by the mPIPE hardware. This limitation is + * required because the hardware implements its own MMIO protection + * scheme to allow large MMIO mappings while still protecting small + * register ranges within the page that should only be accessed by the + * hypervisor. + * + * In contrast, drivers with no hardware service domain limitations + * (for instance the TRIO shim) can implement an arbitrary number of + * service domains. In these cases, each service domain is limited to + * a carefully restricted set of legal MMIO addresses if necessary to + * keep one application from corrupting another application's state. + * + * @section iorpc_conventions System Call Conventions + * + * The driver's open routine is responsible for allocating a new + * service domain for each hv_dev_open() call. By convention, the + * return value from open() should be the service domain number on + * success, or GXIO_ERR_NO_SVC_DOM if no more service domains are + * available. + * + * The implementations of hv_dev_pread() and hv_dev_pwrite() are + * responsible for validating the devhdl value passed up by the + * client. Since the device handle returned by hv_dev_open() should + * embed the positive service domain number, drivers should make sure + * that DRV_HDL2BITS(devhdl) is a legal service domain. If the client + * passes an illegal service domain number, the routine should return + * GXIO_ERR_INVAL_SVC_DOM. Once the service domain number has been + * validated, the driver can copy to/from the client buffer and call + * the dispatch_read() or dispatch_write() methods created by the RPC + * generator. + * + * The hv_dev_close() implementation should reset all service domain + * state and put the service domain back on a free list for + * reallocation by a future application. In most cases, this will + * require executing a hardware reset or drain flow and denying any + * MMIO regions that were created for the service domain. + * + * @section iorpc_data Special Data Types + * + * The .idl file syntax allows the creation of syscalls with special + * parameters that require permission checks or translations as part + * of the system call path. Because of limitations in the code + * generator, APIs are generally limited to just one of these special + * parameters per system call, and they are sometimes required to be + * the first or last parameter to the call. Special parameters + * include: + * + * @subsection iorpc_mem_buffer MEM_BUFFER + * + * The MEM_BUFFER() datatype allows user space to "register" memory + * buffers with a device. Registering memory accomplishes two tasks: + * Linux keeps track of all buffers that might be modified by a + * hardware device, and the hardware device drivers bind registered + * buffers to particular hardware resources like ingress NotifRings. + * The MEM_BUFFER() idl syntax can take extra flags like ALIGN_64KB, + * ALIGN_SELF_SIZE, and FLAGS indicating that memory buffers must have + * certain alignment or that the user should be able to pass a "memory + * flags" word specifying attributes like nt_hint or IO cache pinning. + * The parser will accept multiple MEM_BUFFER() flags. + * + * Implementations must obey the following conventions when + * registering memory buffers via the iorpc flow. These rules are a + * result of the Linux driver implementation, which needs to keep + * track of how many times a particular page has been registered with + * the hardware so that it can release the page when all those + * registrations are cleared. + * + * - Memory registrations that refer to a resource which has already + * been bound must return GXIO_ERR_ALREADY_INIT. Thus, it is an + * error to register memory twice without resetting (i.e. closing) the + * resource in between. This convention keeps the Linux driver from + * having to track which particular devices a page is bound to. + * + * - At present, a memory registration is only cleared when the + * service domain is reset. In this case, the Linux driver simply + * closes the HV device file handle and then decrements the reference + * counts of all pages that were previously registered with the + * device. + * + * - In the future, we may add a mechanism for unregistering memory. + * One possible implementation would require that the user specify + * which buffer is currently registered. The HV would then verify + * that that page was actually the one currently mapped and return + * success or failure to Linux, which would then only decrement the + * page reference count if the addresses were mapped. Another scheme + * might allow Linux to pass a token to the HV to be returned when the + * resource is unmapped. + * + * @subsection iorpc_interrupt INTERRUPT + * + * The INTERRUPT .idl datatype allows the client to bind hardware + * interrupts to a particular combination of IPI parameters - CPU, IPI + * PL, and event bit number. This data is passed via a special + * datatype so that the Linux driver can validate the CPU and PL and + * the HV generic iorpc code can translate client CPUs to real CPUs. + * + * @subsection iorpc_pollfd_setup POLLFD_SETUP + * + * The POLLFD_SETUP .idl datatype allows the client to set up hardware + * interrupt bindings which are received by Linux but which are made + * visible to user processes as state transitions on a file descriptor; + * this allows user processes to use Linux primitives, such as poll(), to + * await particular hardware events. This data is passed via a special + * datatype so that the Linux driver may recognize the pollable file + * descriptor and translate it to a set of interrupt target information, + * and so that the HV generic iorpc code can translate client CPUs to real + * CPUs. + * + * @subsection iorpc_pollfd POLLFD + * + * The POLLFD .idl datatype allows manipulation of hardware interrupt + * bindings set up via the POLLFD_SETUP datatype; common operations are + * resetting the state of the requested interrupt events, and unbinding any + * bound interrupts. This data is passed via a special datatype so that + * the Linux driver may recognize the pollable file descriptor and + * translate it to an interrupt identifier previously supplied by the + * hypervisor as the result of an earlier pollfd_setup operation. + * + * @subsection iorpc_blob BLOB + * + * The BLOB .idl datatype allows the client to write an arbitrary + * length string of bytes up to the hypervisor driver. This can be + * useful for passing up large, arbitrarily structured data like + * classifier programs. The iorpc stack takes care of validating the + * buffer VA and CPA as the data passes up to the hypervisor. Unlike + * MEM_BUFFER(), the buffer is not registered - Linux does not bump + * page refcounts and the HV driver should not reuse the buffer once + * the system call is complete. + * + * @section iorpc_translation Translating User Space Calls + * + * The ::iorpc_offset structure describes the formatting of the offset + * that is passed to pread() or pwrite() as part of the generated RPC code. + * When the user calls up to Linux, the rpc code fills in all the fields of + * the offset, including a 16-bit opcode, a 16 bit format indicator, and 32 + * bits of user-specified "sub-offset". The opcode indicates which syscall + * is being requested. The format indicates whether there is a "prefix + * struct" at the start of the memory buffer passed to pwrite(), and if so + * what data is in that prefix struct. These prefix structs are used to + * implement special datatypes like MEM_BUFFER() and INTERRUPT - we arrange + * to put data that needs translation and permission checks at the start of + * the buffer so that the Linux driver and generic portions of the HV iorpc + * code can easily access the data. The 32 bits of user-specified + * "sub-offset" are most useful for pread() calls where the user needs to + * also pass in a few bits indicating which register to read, etc. + * + * The Linux iorpc driver watches for system calls that contain prefix + * structs so that it can translate parameters and bump reference + * counts as appropriate. It does not (currently) have any knowledge + * of the per-device opcodes - it doesn't care what operation you're + * doing to mPIPE, so long as it can do all the generic book-keeping. + * The hv/iorpc.h header file defines all of the generic encoding bits + * needed to translate iorpc calls without knowing which particular + * opcode is being issued. + * + * @section iorpc_globals Global iorpc Calls + * + * Implementing mmap() required adding some special iorpc syscalls + * that are only called by the Linux driver, never by userspace. + * These include get_mmio_base() and check_mmio_offset(). These + * routines are described in globals.idl and must be included in every + * iorpc driver. By providing these routines in every driver, Linux's + * mmap implementation can easily get the PTE bits it needs and + * validate the PA offset without needing to know the per-device + * opcodes to perform those tasks. + * + * @section iorpc_kernel Supporting gxio APIs in the Kernel + * + * The iorpc code generator also supports generation of kernel code + * implementing the gxio APIs. This capability is currently used by + * the mPIPE network driver, and will likely be used by the TRIO root + * complex and endpoint drivers and perhaps an in-kernel crypto + * driver. Each driver that wants to instantiate iorpc calls in the + * kernel needs to generate a kernel version of the generate rpc code + * and (probably) copy any related gxio source files into the kernel. + * The mPIPE driver provides a good example of this pattern. + */ + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +#if defined(__HV__) +#include +#elif defined(__KERNEL__) +#include "hypervisor.h" +#include +#else +#include +#endif + + +/** Code indicating translation services required within the RPC path. + * These indicate whether there is a translatable struct at the start + * of the RPC buffer and what information that struct contains. + */ +enum iorpc_format_e +{ + /** No translation required, no prefix struct. */ + IORPC_FORMAT_NONE, + + /** No translation required, no prefix struct, no access to this + * operation from user space. */ + IORPC_FORMAT_NONE_NOUSER, + + /** Prefix struct contains user VA and size. */ + IORPC_FORMAT_USER_MEM, + + /** Prefix struct contains CPA, size, and homing bits. */ + IORPC_FORMAT_KERNEL_MEM, + + /** Prefix struct contains interrupt. */ + IORPC_FORMAT_KERNEL_INTERRUPT, + + /** Prefix struct contains user-level interrupt. */ + IORPC_FORMAT_USER_INTERRUPT, + + /** Prefix struct contains pollfd_setup (interrupt information). */ + IORPC_FORMAT_KERNEL_POLLFD_SETUP, + + /** Prefix struct contains user-level pollfd_setup (file descriptor). */ + IORPC_FORMAT_USER_POLLFD_SETUP, + + /** Prefix struct contains pollfd (interrupt cookie). */ + IORPC_FORMAT_KERNEL_POLLFD, + + /** Prefix struct contains user-level pollfd (file descriptor). */ + IORPC_FORMAT_USER_POLLFD, +}; + + +/** Generate an opcode given format and code. */ +#define IORPC_OPCODE(FORMAT, CODE) (((FORMAT) << 16) | (CODE)) + +/** The offset passed through the read() and write() system calls + combines an opcode with 32 bits of user-specified offset. */ +union iorpc_offset +{ +#ifndef __BIG_ENDIAN__ + uint64_t offset; /**< All bits. */ + + struct + { + uint16_t code; /**< RPC code. */ + uint16_t format; /**< iorpc_format_e */ + uint32_t sub_offset; /**< caller-specified offset. */ + }; + + uint32_t opcode; /**< Opcode combines code & format. */ +#else + uint64_t offset; /**< All bits. */ + + struct + { + uint32_t sub_offset; /**< caller-specified offset. */ + uint16_t format; /**< iorpc_format_e */ + uint16_t code; /**< RPC code. */ + }; + + struct + { + uint32_t padding; + uint32_t opcode; /**< Opcode combines code & format. */ + }; +#endif +}; + + +/** Homing and cache hinting bits that can be used by IO devices. */ +struct iorpc_mem_attr +{ + unsigned int lotar_x:4; /**< lotar X bits (or Gx page_mask). */ + unsigned int lotar_y:4; /**< lotar Y bits (or Gx page_offset). */ + unsigned int hfh:1; /**< Uses hash-for-home. */ + unsigned int nt_hint:1; /**< Non-temporal hint. */ + unsigned int io_pin:1; /**< Only fill 'IO' cache ways. */ +}; + +/** Set the nt_hint bit. */ +#define IORPC_MEM_BUFFER_FLAG_NT_HINT (1 << 0) + +/** Set the IO pin bit. */ +#define IORPC_MEM_BUFFER_FLAG_IO_PIN (1 << 1) + + +/** A structure used to describe memory registration. Different + protection levels describe memory differently, so this union + contains all the different possible descriptions. As a request + moves up the call chain, each layer translates from one + description format to the next. In particular, the Linux iorpc + driver translates user VAs into CPAs and homing parameters. */ +union iorpc_mem_buffer +{ + struct + { + uint64_t va; /**< User virtual address. */ + uint64_t size; /**< Buffer size. */ + unsigned int flags; /**< nt_hint, IO pin. */ + } + user; /**< Buffer as described by user apps. */ + + struct + { + unsigned long long cpa; /**< Client physical address. */ +#if defined(__KERNEL__) || defined(__HV__) + size_t size; /**< Buffer size. */ + HV_PTE pte; /**< PTE describing memory homing. */ +#else + uint64_t size; + uint64_t pte; +#endif + unsigned int flags; /**< nt_hint, IO pin. */ + } + kernel; /**< Buffer as described by kernel. */ + + struct + { + unsigned long long pa; /**< Physical address. */ + size_t size; /**< Buffer size. */ + struct iorpc_mem_attr attr; /**< Homing and locality hint bits. */ + } + hv; /**< Buffer parameters for HV driver. */ +}; + + +/** A structure used to describe interrupts. The format differs slightly + * for user and kernel interrupts. As with the mem_buffer_t, translation + * between the formats is done at each level. */ +union iorpc_interrupt +{ + struct + { + int cpu; /**< CPU. */ + int event; /**< evt_num */ + } + user; /**< Interrupt as described by user applications. */ + + struct + { + int x; /**< X coord. */ + int y; /**< Y coord. */ + int ipi; /**< int_num */ + int event; /**< evt_num */ + } + kernel; /**< Interrupt as described by the kernel. */ + +}; + + +/** A structure used to describe interrupts used with poll(). The format + * differs significantly for requests from user to kernel, and kernel to + * hypervisor. As with the mem_buffer_t, translation between the formats + * is done at each level. */ +union iorpc_pollfd_setup +{ + struct + { + int fd; /**< Pollable file descriptor. */ + } + user; /**< pollfd_setup as described by user applications. */ + + struct + { + int x; /**< X coord. */ + int y; /**< Y coord. */ + int ipi; /**< int_num */ + int event; /**< evt_num */ + } + kernel; /**< pollfd_setup as described by the kernel. */ + +}; + + +/** A structure used to describe previously set up interrupts used with + * poll(). The format differs significantly for requests from user to + * kernel, and kernel to hypervisor. As with the mem_buffer_t, translation + * between the formats is done at each level. */ +union iorpc_pollfd +{ + struct + { + int fd; /**< Pollable file descriptor. */ + } + user; /**< pollfd as described by user applications. */ + + struct + { + int cookie; /**< hv cookie returned by the pollfd_setup operation. */ + } + kernel; /**< pollfd as described by the kernel. */ + +}; + + +/** The various iorpc devices use error codes from -1100 to -1299. + * + * This range is distinct from netio (-700 to -799), the hypervisor + * (-800 to -899), tilepci (-900 to -999), ilib (-1000 to -1099), + * gxcr (-1300 to -1399) and gxpci (-1400 to -1499). + */ +enum gxio_err_e { + + /** Largest iorpc error number. */ + GXIO_ERR_MAX = -1101, + + + /********************************************************/ + /* Generic Error Codes */ + /********************************************************/ + + /** Bad RPC opcode - possible version incompatibility. */ + GXIO_ERR_OPCODE = -1101, + + /** Invalid parameter. */ + GXIO_ERR_INVAL = -1102, + + /** Memory buffer did not meet alignment requirements. */ + GXIO_ERR_ALIGNMENT = -1103, + + /** Memory buffers must be coherent and cacheable. */ + GXIO_ERR_COHERENCE = -1104, + + /** Resource already initialized. */ + GXIO_ERR_ALREADY_INIT = -1105, + + /** No service domains available. */ + GXIO_ERR_NO_SVC_DOM = -1106, + + /** Illegal service domain number. */ + GXIO_ERR_INVAL_SVC_DOM = -1107, + + /** Illegal MMIO address. */ + GXIO_ERR_MMIO_ADDRESS = -1108, + + /** Illegal interrupt binding. */ + GXIO_ERR_INTERRUPT = -1109, + + /** Unreasonable client memory. */ + GXIO_ERR_CLIENT_MEMORY = -1110, + + /** No more IOTLB entries. */ + GXIO_ERR_IOTLB_ENTRY = -1111, + + /** Invalid memory size. */ + GXIO_ERR_INVAL_MEMORY_SIZE = -1112, + + /** Unsupported operation. */ + GXIO_ERR_UNSUPPORTED_OP = -1113, + + /** Insufficient DMA credits. */ + GXIO_ERR_DMA_CREDITS = -1114, + + /** Operation timed out. */ + GXIO_ERR_TIMEOUT = -1115, + + /** No such device or object. */ + GXIO_ERR_NO_DEVICE = -1116, + + /** Device or resource busy. */ + GXIO_ERR_BUSY = -1117, + + /** I/O error. */ + GXIO_ERR_IO = -1118, + + /** Permissions error. */ + GXIO_ERR_PERM = -1119, + + + + /********************************************************/ + /* Test Device Error Codes */ + /********************************************************/ + + /** Illegal register number. */ + GXIO_TEST_ERR_REG_NUMBER = -1120, + + /** Illegal buffer slot. */ + GXIO_TEST_ERR_BUFFER_SLOT = -1121, + + + /********************************************************/ + /* MPIPE Error Codes */ + /********************************************************/ + + + /** Invalid buffer size. */ + GXIO_MPIPE_ERR_INVAL_BUFFER_SIZE = -1131, + + /** Cannot allocate buffer stack. */ + GXIO_MPIPE_ERR_NO_BUFFER_STACK = -1140, + + /** Invalid buffer stack number. */ + GXIO_MPIPE_ERR_BAD_BUFFER_STACK = -1141, + + /** Cannot allocate NotifRing. */ + GXIO_MPIPE_ERR_NO_NOTIF_RING = -1142, + + /** Invalid NotifRing number. */ + GXIO_MPIPE_ERR_BAD_NOTIF_RING = -1143, + + /** Cannot allocate NotifGroup. */ + GXIO_MPIPE_ERR_NO_NOTIF_GROUP = -1144, + + /** Invalid NotifGroup number. */ + GXIO_MPIPE_ERR_BAD_NOTIF_GROUP = -1145, + + /** Cannot allocate bucket. */ + GXIO_MPIPE_ERR_NO_BUCKET = -1146, + + /** Invalid bucket number. */ + GXIO_MPIPE_ERR_BAD_BUCKET = -1147, + + /** Cannot allocate eDMA ring. */ + GXIO_MPIPE_ERR_NO_EDMA_RING = -1148, + + /** Invalid eDMA ring number. */ + GXIO_MPIPE_ERR_BAD_EDMA_RING = -1149, + + /** Invalid channel number. */ + GXIO_MPIPE_ERR_BAD_CHANNEL = -1150, + + /** Bad configuration. */ + GXIO_MPIPE_ERR_BAD_CONFIG = -1151, + + /** Empty iqueue. */ + GXIO_MPIPE_ERR_IQUEUE_EMPTY = -1152, + + /** Empty rules. */ + GXIO_MPIPE_ERR_RULES_EMPTY = -1160, + + /** Full rules. */ + GXIO_MPIPE_ERR_RULES_FULL = -1161, + + /** Corrupt rules. */ + GXIO_MPIPE_ERR_RULES_CORRUPT = -1162, + + /** Invalid rules. */ + GXIO_MPIPE_ERR_RULES_INVALID = -1163, + + /** Classifier is too big. */ + GXIO_MPIPE_ERR_CLASSIFIER_TOO_BIG = -1170, + + /** Classifier is too complex. */ + GXIO_MPIPE_ERR_CLASSIFIER_TOO_COMPLEX = -1171, + + /** Classifier has bad header. */ + GXIO_MPIPE_ERR_CLASSIFIER_BAD_HEADER = -1172, + + /** Classifier has bad contents. */ + GXIO_MPIPE_ERR_CLASSIFIER_BAD_CONTENTS = -1173, + + /** Classifier encountered invalid symbol. */ + GXIO_MPIPE_ERR_CLASSIFIER_INVAL_SYMBOL = -1174, + + /** Classifier encountered invalid bounds. */ + GXIO_MPIPE_ERR_CLASSIFIER_INVAL_BOUNDS = -1175, + + /** Classifier encountered invalid relocation. */ + GXIO_MPIPE_ERR_CLASSIFIER_INVAL_RELOCATION = -1176, + + /** Classifier encountered undefined symbol. */ + GXIO_MPIPE_ERR_CLASSIFIER_UNDEF_SYMBOL = -1177, + + + /********************************************************/ + /* TRIO Error Codes */ + /********************************************************/ + + /** Cannot allocate memory map region. */ + GXIO_TRIO_ERR_NO_MEMORY_MAP = -1180, + + /** Invalid memory map region number. */ + GXIO_TRIO_ERR_BAD_MEMORY_MAP = -1181, + + /** Cannot allocate scatter queue. */ + GXIO_TRIO_ERR_NO_SCATTER_QUEUE = -1182, + + /** Invalid scatter queue number. */ + GXIO_TRIO_ERR_BAD_SCATTER_QUEUE = -1183, + + /** Cannot allocate push DMA ring. */ + GXIO_TRIO_ERR_NO_PUSH_DMA_RING = -1184, + + /** Invalid push DMA ring index. */ + GXIO_TRIO_ERR_BAD_PUSH_DMA_RING = -1185, + + /** Cannot allocate pull DMA ring. */ + GXIO_TRIO_ERR_NO_PULL_DMA_RING = -1186, + + /** Invalid pull DMA ring index. */ + GXIO_TRIO_ERR_BAD_PULL_DMA_RING = -1187, + + /** Cannot allocate PIO region. */ + GXIO_TRIO_ERR_NO_PIO = -1188, + + /** Invalid PIO region index. */ + GXIO_TRIO_ERR_BAD_PIO = -1189, + + /** Cannot allocate ASID. */ + GXIO_TRIO_ERR_NO_ASID = -1190, + + /** Invalid ASID. */ + GXIO_TRIO_ERR_BAD_ASID = -1191, + + + /********************************************************/ + /* MICA Error Codes */ + /********************************************************/ + + /** No such accelerator type. */ + GXIO_MICA_ERR_BAD_ACCEL_TYPE = -1220, + + /** Cannot allocate context. */ + GXIO_MICA_ERR_NO_CONTEXT = -1221, + + /** PKA command queue is full, can't add another command. */ + GXIO_MICA_ERR_PKA_CMD_QUEUE_FULL = -1222, + + /** PKA result queue is empty, can't get a result from the queue. */ + GXIO_MICA_ERR_PKA_RESULT_QUEUE_EMPTY = -1223, + + /********************************************************/ + /* GPIO Error Codes */ + /********************************************************/ + + /** Pin not available. Either the physical pin does not exist, or + * it is reserved by the hypervisor for system usage. */ + GXIO_GPIO_ERR_PIN_UNAVAILABLE = -1240, + + /** Pin busy. The pin exists, and is available for use via GXIO, but + * it has been attached by some other process or driver. */ + GXIO_GPIO_ERR_PIN_BUSY = -1241, + + /** Cannot access unattached pin. One or more of the pins being + * manipulated by this call are not attached to the requesting + * context. */ + GXIO_GPIO_ERR_PIN_UNATTACHED = -1242, + + /** Invalid I/O mode for pin. The wiring of the pin in the system + * is such that the I/O mode or electrical control parameters + * requested could cause damage. */ + GXIO_GPIO_ERR_PIN_INVALID_MODE = -1243, + + /** Smallest iorpc error number. */ + GXIO_ERR_MIN = -1299 +}; + + +#endif /* !_HV_IORPC_H_ */ -- cgit v1.2.3 From 44e56967100f22a21abade38821018ba03d0a39f Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 6 Apr 2012 13:52:07 -0400 Subject: arch/tile: support MMIO-based readb/writeb etc. Add support for MMIO read/write on tilegx to support GXIO IORPC access. Similar to the asm-generic version, but we include memory fences on the writes to be conservative. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/io.h | 144 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 116 insertions(+), 28 deletions(-) diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h index d2152deb1f3c..2a9b293fece6 100644 --- a/arch/tile/include/asm/io.h +++ b/arch/tile/include/asm/io.h @@ -62,6 +62,92 @@ extern void iounmap(volatile void __iomem *addr); #define mm_ptov(addr) ((void *)phys_to_virt(addr)) #define mm_vtop(addr) ((unsigned long)virt_to_phys(addr)) +#if CHIP_HAS_MMIO() + +/* + * We use inline assembly to guarantee that the compiler does not + * split an access into multiple byte-sized accesses as it might + * sometimes do if a register data structure is marked "packed". + * Obviously on tile we can't tolerate such an access being + * actually unaligned, but we want to avoid the case where the + * compiler conservatively would generate multiple accesses even + * for an aligned read or write. + */ + +static inline u8 __raw_readb(const volatile void __iomem *addr) +{ + return *(const volatile u8 __force *)addr; +} + +static inline u16 __raw_readw(const volatile void __iomem *addr) +{ + u16 ret; + asm volatile("ld2u %0, %1" : "=r" (ret) : "r" (addr)); + barrier(); + return le16_to_cpu(ret); +} + +static inline u32 __raw_readl(const volatile void __iomem *addr) +{ + u32 ret; + /* Sign-extend to conform to u32 ABI sign-extension convention. */ + asm volatile("ld4s %0, %1" : "=r" (ret) : "r" (addr)); + barrier(); + return le32_to_cpu(ret); +} + +static inline u64 __raw_readq(const volatile void __iomem *addr) +{ + u64 ret; + asm volatile("ld %0, %1" : "=r" (ret) : "r" (addr)); + barrier(); + return le64_to_cpu(ret); +} + +static inline void __raw_writeb(u8 val, volatile void __iomem *addr) +{ + *(volatile u8 __force *)addr = val; +} + +static inline void __raw_writew(u16 val, volatile void __iomem *addr) +{ + asm volatile("st2 %0, %1" :: "r" (addr), "r" (cpu_to_le16(val))); +} + +static inline void __raw_writel(u32 val, volatile void __iomem *addr) +{ + asm volatile("st4 %0, %1" :: "r" (addr), "r" (cpu_to_le32(val))); +} + +static inline void __raw_writeq(u64 val, volatile void __iomem *addr) +{ + asm volatile("st %0, %1" :: "r" (addr), "r" (cpu_to_le64(val))); +} + +/* + * The on-chip I/O hardware on tilegx is configured with VA=PA for the + * kernel's PA range. The low-level APIs and field names use "va" and + * "void *" nomenclature, to be consistent with the general notion + * that the addresses in question are virtualizable, but in the kernel + * context we are actually manipulating PA values. (In other contexts, + * e.g. access from user space, we do in fact use real virtual addresses + * in the va fields.) To allow readers of the code to understand what's + * happening, we direct their attention to this comment by using the + * following two functions that just duplicate __va() and __pa(). + */ +typedef unsigned long tile_io_addr_t; +static inline tile_io_addr_t va_to_tile_io_addr(void *va) +{ + BUILD_BUG_ON(sizeof(phys_addr_t) != sizeof(tile_io_addr_t)); + return __pa(va); +} +static inline void *tile_io_addr_to_va(tile_io_addr_t tile_io_addr) +{ + return __va(tile_io_addr); +} + +#else /* CHIP_HAS_MMIO() */ + #ifdef CONFIG_PCI extern u8 _tile_readb(unsigned long addr); @@ -73,10 +159,19 @@ extern void _tile_writew(u16 val, unsigned long addr); extern void _tile_writel(u32 val, unsigned long addr); extern void _tile_writeq(u64 val, unsigned long addr); -#else +#define __raw_readb(addr) _tile_readb((unsigned long)addr) +#define __raw_readw(addr) _tile_readw((unsigned long)addr) +#define __raw_readl(addr) _tile_readl((unsigned long)addr) +#define __raw_readq(addr) _tile_readq((unsigned long)addr) +#define __raw_writeb(val, addr) _tile_writeb(val, (unsigned long)addr) +#define __raw_writew(val, addr) _tile_writew(val, (unsigned long)addr) +#define __raw_writel(val, addr) _tile_writel(val, (unsigned long)addr) +#define __raw_writeq(val, addr) _tile_writeq(val, (unsigned long)addr) + +#else /* CONFIG_PCI */ /* - * The Tile architecture does not support IOMEM unless PCI is enabled. + * The tilepro architecture does not support IOMEM unless PCI is enabled. * Unfortunately we can't yet simply not declare these methods, * since some generic code that compiles into the kernel, but * we never run, uses them unconditionally. @@ -88,65 +183,58 @@ static inline int iomem_panic(void) return 0; } -static inline u8 _tile_readb(unsigned long addr) +static inline u8 readb(unsigned long addr) { return iomem_panic(); } -static inline u16 _tile_readw(unsigned long addr) +static inline u16 _readw(unsigned long addr) { return iomem_panic(); } -static inline u32 _tile_readl(unsigned long addr) +static inline u32 readl(unsigned long addr) { return iomem_panic(); } -static inline u64 _tile_readq(unsigned long addr) +static inline u64 readq(unsigned long addr) { return iomem_panic(); } -static inline void _tile_writeb(u8 val, unsigned long addr) +static inline void writeb(u8 val, unsigned long addr) { iomem_panic(); } -static inline void _tile_writew(u16 val, unsigned long addr) +static inline void writew(u16 val, unsigned long addr) { iomem_panic(); } -static inline void _tile_writel(u32 val, unsigned long addr) +static inline void writel(u32 val, unsigned long addr) { iomem_panic(); } -static inline void _tile_writeq(u64 val, unsigned long addr) +static inline void writeq(u64 val, unsigned long addr) { iomem_panic(); } -#endif +#endif /* CONFIG_PCI */ + +#endif /* CHIP_HAS_MMIO() */ -#define readb(addr) _tile_readb((unsigned long)addr) -#define readw(addr) _tile_readw((unsigned long)addr) -#define readl(addr) _tile_readl((unsigned long)addr) -#define readq(addr) _tile_readq((unsigned long)addr) -#define writeb(val, addr) _tile_writeb(val, (unsigned long)addr) -#define writew(val, addr) _tile_writew(val, (unsigned long)addr) -#define writel(val, addr) _tile_writel(val, (unsigned long)addr) -#define writeq(val, addr) _tile_writeq(val, (unsigned long)addr) - -#define __raw_readb readb -#define __raw_readw readw -#define __raw_readl readl -#define __raw_readq readq -#define __raw_writeb writeb -#define __raw_writew writew -#define __raw_writel writel -#define __raw_writeq writeq +#define readb __raw_readb +#define readw __raw_readw +#define readl __raw_readl +#define readq __raw_readq +#define writeb __raw_writeb +#define writew __raw_writew +#define writel __raw_writel +#define writeq __raw_writeq #define readb_relaxed readb #define readw_relaxed readw -- cgit v1.2.3 From 6369798037c0e915fc3e3844083f2aeecb924c9d Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 4 Apr 2012 16:58:27 -0400 Subject: arch/tile: common DMA code for the GXIO IORPC subsystem The dma_queue support is used by both the mPipe (networking) and Trio (PCI) hardware shims on tilegx. This common code is selected when either of those drivers is built. Signed-off-by: Chris Metcalf --- arch/tile/gxio/Kconfig | 6 ++ arch/tile/gxio/Makefile | 1 + arch/tile/gxio/dma_queue.c | 176 +++++++++++++++++++++++++++++++++++++ arch/tile/include/gxio/dma_queue.h | 161 +++++++++++++++++++++++++++++++++ 4 files changed, 344 insertions(+) create mode 100644 arch/tile/gxio/dma_queue.c create mode 100644 arch/tile/include/gxio/dma_queue.h diff --git a/arch/tile/gxio/Kconfig b/arch/tile/gxio/Kconfig index 8eff47fe1236..ecd076c8cfd5 100644 --- a/arch/tile/gxio/Kconfig +++ b/arch/tile/gxio/Kconfig @@ -3,3 +3,9 @@ config TILE_GXIO bool depends on TILEGX + +# Support direct access to the common I/O DMA facility within the +# TILE-Gx mPIPE and Trio hardware from kernel space. +config TILE_GXIO_DMA + bool + select TILE_GXIO diff --git a/arch/tile/gxio/Makefile b/arch/tile/gxio/Makefile index db1ee2863d8e..97ab468fb8c5 100644 --- a/arch/tile/gxio/Makefile +++ b/arch/tile/gxio/Makefile @@ -3,3 +3,4 @@ # obj-$(CONFIG_TILE_GXIO) += iorpc_globals.o kiorpc.o +obj-$(CONFIG_TILE_GXIO_DMA) += dma_queue.o diff --git a/arch/tile/gxio/dma_queue.c b/arch/tile/gxio/dma_queue.c new file mode 100644 index 000000000000..baa60357f8ba --- /dev/null +++ b/arch/tile/gxio/dma_queue.c @@ -0,0 +1,176 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include + +/* Wait for a memory read to complete. */ +#define wait_for_value(val) \ + __asm__ __volatile__("move %0, %0" :: "r"(val)) + +/* The index is in the low 16. */ +#define DMA_QUEUE_INDEX_MASK ((1 << 16) - 1) + +/* + * The hardware descriptor-ring type. + * This matches the types used by mpipe (MPIPE_EDMA_POST_REGION_VAL_t) + * and trio (TRIO_PUSH_DMA_REGION_VAL_t or TRIO_PULL_DMA_REGION_VAL_t). + * See those types for more documentation on the individual fields. + */ +typedef union { + struct { +#ifndef __BIG_ENDIAN__ + uint64_t ring_idx:16; + uint64_t count:16; + uint64_t gen:1; + uint64_t __reserved:31; +#else + uint64_t __reserved:31; + uint64_t gen:1; + uint64_t count:16; + uint64_t ring_idx:16; +#endif + }; + uint64_t word; +} __gxio_ring_t; + +void __gxio_dma_queue_init(__gxio_dma_queue_t *dma_queue, + void *post_region_addr, unsigned int num_entries) +{ + /* + * Limit 65536 entry rings to 65535 credits because we only have a + * 16 bit completion counter. + */ + int64_t credits = (num_entries < 65536) ? num_entries : 65535; + + memset(dma_queue, 0, sizeof(*dma_queue)); + + dma_queue->post_region_addr = post_region_addr; + dma_queue->hw_complete_count = 0; + dma_queue->credits_and_next_index = credits << DMA_QUEUE_CREDIT_SHIFT; +} + +EXPORT_SYMBOL_GPL(__gxio_dma_queue_init); + +void __gxio_dma_queue_update_credits(__gxio_dma_queue_t *dma_queue) +{ + __gxio_ring_t val; + uint64_t count; + uint64_t delta; + uint64_t new_count; + + /* + * Read the 64-bit completion count without touching the cache, so + * we later avoid having to evict any sharers of this cache line + * when we update it below. + */ + uint64_t orig_hw_complete_count = + cmpxchg(&dma_queue->hw_complete_count, + -1, -1); + + /* Make sure the load completes before we access the hardware. */ + wait_for_value(orig_hw_complete_count); + + /* Read the 16-bit count of how many packets it has completed. */ + val.word = __gxio_mmio_read(dma_queue->post_region_addr); + count = val.count; + + /* + * Calculate the number of completions since we last updated the + * 64-bit counter. It's safe to ignore the high bits because the + * maximum credit value is 65535. + */ + delta = (count - orig_hw_complete_count) & 0xffff; + if (delta == 0) + return; + + /* + * Try to write back the count, advanced by delta. If we race with + * another thread, this might fail, in which case we return + * immediately on the assumption that some credits are (or at least + * were) available. + */ + new_count = orig_hw_complete_count + delta; + if (cmpxchg(&dma_queue->hw_complete_count, + orig_hw_complete_count, + new_count) != orig_hw_complete_count) + return; + + /* + * We succeeded in advancing the completion count; add back the + * corresponding number of egress credits. + */ + __insn_fetchadd(&dma_queue->credits_and_next_index, + (delta << DMA_QUEUE_CREDIT_SHIFT)); +} + +EXPORT_SYMBOL_GPL(__gxio_dma_queue_update_credits); + +/* + * A separate 'blocked' method for put() so that backtraces and + * profiles will clearly indicate that we're wasting time spinning on + * egress availability rather than actually posting commands. + */ +int64_t __gxio_dma_queue_wait_for_credits(__gxio_dma_queue_t *dma_queue, + int64_t modifier) +{ + int backoff = 16; + int64_t old; + + do { + int i; + /* Back off to avoid spamming memory networks. */ + for (i = backoff; i > 0; i--) + __insn_mfspr(SPR_PASS); + + /* Check credits again. */ + __gxio_dma_queue_update_credits(dma_queue); + old = __insn_fetchaddgez(&dma_queue->credits_and_next_index, + modifier); + + /* Calculate bounded exponential backoff for next iteration. */ + if (backoff < 256) + backoff *= 2; + } while (old + modifier < 0); + + return old; +} + +EXPORT_SYMBOL_GPL(__gxio_dma_queue_wait_for_credits); + +int64_t __gxio_dma_queue_reserve_aux(__gxio_dma_queue_t *dma_queue, + unsigned int num, int wait) +{ + return __gxio_dma_queue_reserve(dma_queue, num, wait != 0, true); +} + +EXPORT_SYMBOL_GPL(__gxio_dma_queue_reserve_aux); + +int __gxio_dma_queue_is_complete(__gxio_dma_queue_t *dma_queue, + int64_t completion_slot, int update) +{ + if (update) { + if (ACCESS_ONCE(dma_queue->hw_complete_count) > + completion_slot) + return 1; + + __gxio_dma_queue_update_credits(dma_queue); + } + + return ACCESS_ONCE(dma_queue->hw_complete_count) > completion_slot; +} + +EXPORT_SYMBOL_GPL(__gxio_dma_queue_is_complete); diff --git a/arch/tile/include/gxio/dma_queue.h b/arch/tile/include/gxio/dma_queue.h new file mode 100644 index 000000000000..00654feb7db0 --- /dev/null +++ b/arch/tile/include/gxio/dma_queue.h @@ -0,0 +1,161 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#ifndef _GXIO_DMA_QUEUE_H_ +#define _GXIO_DMA_QUEUE_H_ + +/* + * DMA queue management APIs shared between TRIO and mPIPE. + */ + +#include "common.h" + +/* The credit counter lives in the high 32 bits. */ +#define DMA_QUEUE_CREDIT_SHIFT 32 + +/* + * State object that tracks a DMA queue's head and tail indices, as + * well as the number of commands posted and completed. The + * structure is accessed via a thread-safe, lock-free algorithm. + */ +typedef struct { + /* + * Address of a MPIPE_EDMA_POST_REGION_VAL_t, + * TRIO_PUSH_DMA_REGION_VAL_t, or TRIO_PULL_DMA_REGION_VAL_t + * register. These register have identical encodings and provide + * information about how many commands have been processed. + */ + void *post_region_addr; + + /* + * A lazily-updated count of how many edescs the hardware has + * completed. + */ + uint64_t hw_complete_count __attribute__ ((aligned(64))); + + /* + * High 32 bits are a count of available egress command credits, + * low 24 bits are the next egress "slot". + */ + int64_t credits_and_next_index; + +} __gxio_dma_queue_t; + +/* Initialize a dma queue. */ +extern void __gxio_dma_queue_init(__gxio_dma_queue_t *dma_queue, + void *post_region_addr, + unsigned int num_entries); + +/* + * Update the "credits_and_next_index" and "hw_complete_count" fields + * based on pending hardware completions. Note that some other thread + * may have already done this and, importantly, may still be in the + * process of updating "credits_and_next_index". + */ +extern void __gxio_dma_queue_update_credits(__gxio_dma_queue_t *dma_queue); + +/* Wait for credits to become available. */ +extern int64_t __gxio_dma_queue_wait_for_credits(__gxio_dma_queue_t *dma_queue, + int64_t modifier); + +/* Reserve slots in the queue, optionally waiting for slots to become + * available, and optionally returning a "completion_slot" suitable for + * direct comparison to "hw_complete_count". + */ +static inline int64_t __gxio_dma_queue_reserve(__gxio_dma_queue_t *dma_queue, + unsigned int num, bool wait, + bool completion) +{ + uint64_t slot; + + /* + * Try to reserve 'num' egress command slots. We do this by + * constructing a constant that subtracts N credits and adds N to + * the index, and using fetchaddgez to only apply it if the credits + * count doesn't go negative. + */ + int64_t modifier = (((int64_t)(-num)) << DMA_QUEUE_CREDIT_SHIFT) | num; + int64_t old = + __insn_fetchaddgez(&dma_queue->credits_and_next_index, + modifier); + + if (unlikely(old + modifier < 0)) { + /* + * We're out of credits. Try once to get more by checking for + * completed egress commands. If that fails, wait or fail. + */ + __gxio_dma_queue_update_credits(dma_queue); + old = __insn_fetchaddgez(&dma_queue->credits_and_next_index, + modifier); + if (old + modifier < 0) { + if (wait) + old = __gxio_dma_queue_wait_for_credits + (dma_queue, modifier); + else + return GXIO_ERR_DMA_CREDITS; + } + } + + /* The bottom 24 bits of old encode the "slot". */ + slot = (old & 0xffffff); + + if (completion) { + /* + * A "completion_slot" is a "slot" which can be compared to + * "hw_complete_count" at any time in the future. To convert + * "slot" into a "completion_slot", we access "hw_complete_count" + * once (knowing that we have reserved a slot, and thus, it will + * be "basically" accurate), and combine its high 40 bits with + * the 24 bit "slot", and handle "wrapping" by adding "1 << 24" + * if the result is LESS than "hw_complete_count". + */ + uint64_t complete; + complete = ACCESS_ONCE(dma_queue->hw_complete_count); + slot |= (complete & 0xffffffffff000000); + if (slot < complete) + slot += 0x1000000; + } + + /* + * If any of our slots mod 256 were equivalent to 0, go ahead and + * collect some egress credits, and update "hw_complete_count", and + * make sure the index doesn't overflow into the credits. + */ + if (unlikely(((old + num) & 0xff) < num)) { + __gxio_dma_queue_update_credits(dma_queue); + + /* Make sure the index doesn't overflow into the credits. */ +#ifdef __BIG_ENDIAN__ + *(((uint8_t *)&dma_queue->credits_and_next_index) + 4) = 0; +#else + *(((uint8_t *)&dma_queue->credits_and_next_index) + 3) = 0; +#endif + } + + return slot; +} + +/* Non-inlinable "__gxio_dma_queue_reserve(..., true)". */ +extern int64_t __gxio_dma_queue_reserve_aux(__gxio_dma_queue_t *dma_queue, + unsigned int num, int wait); + +/* Check whether a particular "completion slot" has completed. + * + * Note that this function requires a "completion slot", and thus + * cannot be used with the result of any "reserve_fast" function. + */ +extern int __gxio_dma_queue_is_complete(__gxio_dma_queue_t *dma_queue, + int64_t completion_slot, int update); + +#endif /* !_GXIO_DMA_QUEUE_H_ */ -- cgit v1.2.3 From 4875f69fecab08654972d6fb0d71ee2109d2538c Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 6 Apr 2012 16:38:03 -0400 Subject: arch/tile: provide kernel support for the tilegx mPIPE shim The TILE-Gx chip includes a packet-processing network engine called mPIPE ("Multicore Programmable Intelligent Packet Engine"). This change adds support for using the mPIPE engine from within the kernel. The engine has more functionality than is exposed here, but to keep the kernel code and binary simpler, this is a subset of the full API designed to enable standard Linux networking only. Signed-off-by: Chris Metcalf --- arch/tile/gxio/Kconfig | 6 + arch/tile/gxio/Makefile | 1 + arch/tile/gxio/iorpc_mpipe.c | 529 +++++++++ arch/tile/gxio/iorpc_mpipe_info.c | 85 ++ arch/tile/gxio/mpipe.c | 545 +++++++++ arch/tile/include/arch/mpipe.h | 359 ++++++ arch/tile/include/arch/mpipe_constants.h | 42 + arch/tile/include/arch/mpipe_def.h | 39 + arch/tile/include/arch/mpipe_shm.h | 509 +++++++++ arch/tile/include/arch/mpipe_shm_def.h | 23 + arch/tile/include/gxio/iorpc_mpipe.h | 136 +++ arch/tile/include/gxio/iorpc_mpipe_info.h | 46 + arch/tile/include/gxio/mpipe.h | 1736 +++++++++++++++++++++++++++++ arch/tile/include/hv/drv_mpipe_intf.h | 602 ++++++++++ 14 files changed, 4658 insertions(+) create mode 100644 arch/tile/gxio/iorpc_mpipe.c create mode 100644 arch/tile/gxio/iorpc_mpipe_info.c create mode 100644 arch/tile/gxio/mpipe.c create mode 100644 arch/tile/include/arch/mpipe.h create mode 100644 arch/tile/include/arch/mpipe_constants.h create mode 100644 arch/tile/include/arch/mpipe_def.h create mode 100644 arch/tile/include/arch/mpipe_shm.h create mode 100644 arch/tile/include/arch/mpipe_shm_def.h create mode 100644 arch/tile/include/gxio/iorpc_mpipe.h create mode 100644 arch/tile/include/gxio/iorpc_mpipe_info.h create mode 100644 arch/tile/include/gxio/mpipe.h create mode 100644 arch/tile/include/hv/drv_mpipe_intf.h diff --git a/arch/tile/gxio/Kconfig b/arch/tile/gxio/Kconfig index ecd076c8cfd5..8aeebb70a3df 100644 --- a/arch/tile/gxio/Kconfig +++ b/arch/tile/gxio/Kconfig @@ -9,3 +9,9 @@ config TILE_GXIO config TILE_GXIO_DMA bool select TILE_GXIO + +# Support direct access to the TILE-Gx mPIPE hardware from kernel space. +config TILE_GXIO_MPIPE + bool + select TILE_GXIO + select TILE_GXIO_DMA diff --git a/arch/tile/gxio/Makefile b/arch/tile/gxio/Makefile index 97ab468fb8c5..130eec48c152 100644 --- a/arch/tile/gxio/Makefile +++ b/arch/tile/gxio/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_TILE_GXIO) += iorpc_globals.o kiorpc.o obj-$(CONFIG_TILE_GXIO_DMA) += dma_queue.o +obj-$(CONFIG_TILE_GXIO_MPIPE) += mpipe.o iorpc_mpipe.o iorpc_mpipe_info.o diff --git a/arch/tile/gxio/iorpc_mpipe.c b/arch/tile/gxio/iorpc_mpipe.c new file mode 100644 index 000000000000..31b87bf8c027 --- /dev/null +++ b/arch/tile/gxio/iorpc_mpipe.c @@ -0,0 +1,529 @@ +/* + * Copyright 2012 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/* This file is machine-generated; DO NOT EDIT! */ +#include "gxio/iorpc_mpipe.h" + +struct alloc_buffer_stacks_param { + unsigned int count; + unsigned int first; + unsigned int flags; +}; + +int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t * context, + unsigned int count, unsigned int first, + unsigned int flags) +{ + struct alloc_buffer_stacks_param temp; + struct alloc_buffer_stacks_param *params = &temp; + + params->count = count; + params->first = first; + params->flags = flags; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_MPIPE_OP_ALLOC_BUFFER_STACKS); +} + +EXPORT_SYMBOL(gxio_mpipe_alloc_buffer_stacks); + +struct init_buffer_stack_aux_param { + union iorpc_mem_buffer buffer; + unsigned int stack; + unsigned int buffer_size_enum; +}; + +int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t * context, + void *mem_va, size_t mem_size, + unsigned int mem_flags, unsigned int stack, + unsigned int buffer_size_enum) +{ + int __result; + unsigned long long __cpa; + pte_t __pte; + struct init_buffer_stack_aux_param temp; + struct init_buffer_stack_aux_param *params = &temp; + + __result = va_to_cpa_and_pte(mem_va, &__cpa, &__pte); + if (__result != 0) + return __result; + params->buffer.kernel.cpa = __cpa; + params->buffer.kernel.size = mem_size; + params->buffer.kernel.pte = __pte; + params->buffer.kernel.flags = mem_flags; + params->stack = stack; + params->buffer_size_enum = buffer_size_enum; + + return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params, + sizeof(*params), + GXIO_MPIPE_OP_INIT_BUFFER_STACK_AUX); +} + +EXPORT_SYMBOL(gxio_mpipe_init_buffer_stack_aux); + + +struct alloc_notif_rings_param { + unsigned int count; + unsigned int first; + unsigned int flags; +}; + +int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t * context, + unsigned int count, unsigned int first, + unsigned int flags) +{ + struct alloc_notif_rings_param temp; + struct alloc_notif_rings_param *params = &temp; + + params->count = count; + params->first = first; + param