diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 14:57:10 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 14:57:10 -0800 |
commit | 5d24ae67a961c51beb255a28c9c417d9710247c2 (patch) | |
tree | c23c71b2f17f4502554c80b84be476e4c08f7160 /drivers/infiniband/hw/hfi1 | |
parent | 938edb8a31b976c9a92eb0cd4ff481e93f76c1f1 (diff) | |
parent | f617e5ffe04fd46010b618c9eeadaa04588704c9 (diff) | |
download | linux-5d24ae67a961c51beb255a28c9c417d9710247c2.tar.gz linux-5d24ae67a961c51beb255a28c9c417d9710247c2.tar.bz2 linux-5d24ae67a961c51beb255a28c9c417d9710247c2.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"This has been a fairly typical cycle, with the usual sorts of driver
updates. Several series continue to come through which improve and
modernize various parts of the core code, and we finally are starting
to get the uAPI command interface cleaned up.
- Various driver fixes for bnxt_re, cxgb3/4, hfi1, hns, i40iw, mlx4,
mlx5, qib, rxe, usnic
- Rework the entire syscall flow for uverbs to be able to run over
ioctl(). Finally getting past the historic bad choice to use
write() for command execution
- More functional coverage with the mlx5 'devx' user API
- Start of the HFI1 series for 'TID RDMA'
- SRQ support in the hns driver
- Support for new IBTA defined 2x lane widths
- A big series to consolidate all the driver function pointers into a
big struct and have drivers provide a 'static const' version of the
struct instead of open coding initialization
- New 'advise_mr' uAPI to control device caching/loading of page
tables
- Support for inline data in SRPT
- Modernize how umad uses the driver core and creates cdev's and
sysfs files
- First steps toward removing 'uobject' from the view of the drivers"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (193 commits)
RDMA/srpt: Use kmem_cache_free() instead of kfree()
RDMA/mlx5: Signedness bug in UVERBS_HANDLER()
IB/uverbs: Signedness bug in UVERBS_HANDLER()
IB/mlx5: Allocate the per-port Q counter shared when DEVX is supported
IB/umad: Start using dev_groups of class
IB/umad: Use class_groups and let core create class file
IB/umad: Refactor code to use cdev_device_add()
IB/umad: Avoid destroying device while it is accessed
IB/umad: Simplify and avoid dynamic allocation of class
IB/mlx5: Fix wrong error unwind
IB/mlx4: Remove set but not used variable 'pd'
RDMA/iwcm: Don't copy past the end of dev_name() string
IB/mlx5: Fix long EEH recover time with NVMe offloads
IB/mlx5: Simplify netdev unbinding
IB/core: Move query port to ioctl
RDMA/nldev: Expose port_cap_flags2
IB/core: uverbs copy to struct or zero helper
IB/rxe: Reuse code which sets port state
IB/rxe: Make counters thread safe
IB/mlx5: Use the correct commands for UMEM and UCTX allocation
...
Diffstat (limited to 'drivers/infiniband/hw/hfi1')
23 files changed, 356 insertions, 125 deletions
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index ff790390c91a..3ce9dc8c3463 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -34,6 +34,7 @@ hfi1-y := \ ruc.o \ sdma.o \ sysfs.o \ + tid_rdma.o \ trace.o \ uc.o \ ud.o \ diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 7e6d70936c63..b443642eac02 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1072,6 +1072,8 @@ static void log_state_transition(struct hfi1_pportdata *ppd, u32 state); static void log_physical_state(struct hfi1_pportdata *ppd, u32 state); static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); +static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd, + int msecs); static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc); static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr); static void handle_temp_err(struct hfi1_devdata *dd); @@ -10770,13 +10772,15 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) break; ppd->port_error_action = 0; - ppd->host_link_state = HLS_DN_POLL; if (quick_linkup) { /* quick linkup does not go into polling */ ret = do_quick_linkup(dd); } else { ret1 = set_physical_link_state(dd, PLS_POLLING); + if (!ret1) + ret1 = wait_phys_link_out_of_offline(ppd, + 3000); if (ret1 != HCMD_SUCCESS) { dd_dev_err(dd, "Failed to transition to Polling link state, return 0x%x\n", @@ -10784,6 +10788,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) ret = -EINVAL; } } + + /* + * Change the host link state after requesting DC8051 to + * change its physical state so that we can ignore any + * interrupt with stale LNI(XX) error, which will not be + * cleared until DC8051 transitions to Polling state. + */ + ppd->host_link_state = HLS_DN_POLL; ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE); /* @@ -12928,6 +12940,39 @@ static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd, return read_state; } +/* + * wait_phys_link_out_of_offline - wait for any out of offline state + * @ppd: port device + * @msecs: the number of milliseconds to wait + * + * Wait up to msecs milliseconds for any out of offline physical link + * state change to occur. + * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT. + */ +static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd, + int msecs) +{ + u32 read_state; + unsigned long timeout; + + timeout = jiffies + msecs_to_jiffies(msecs); + while (1) { + read_state = read_physical_state(ppd->dd); + if ((read_state & 0xF0) != PLS_OFFLINE) + break; + if (time_after(jiffies, timeout)) { + dd_dev_err(ppd->dd, + "timeout waiting for phy link out of offline. Read state 0x%x, %dms\n", + read_state, msecs); + return -ETIMEDOUT; + } + usleep_range(1950, 2050); /* sleep 2ms-ish */ + } + + log_state_transition(ppd, read_state); + return read_state; +} + #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index c6163a347e93..c0800ea5a3f8 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -935,6 +935,10 @@ #define SEND_CTXT_CREDIT_CTRL_THRESHOLD_MASK 0x7FFull #define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SHIFT 0 #define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SMASK 0x7FFull +#define SEND_CTXT_CREDIT_STATUS (TXE + 0x000000100018) +#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK 0x7FFull +#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT 32 +#define SEND_CTXT_CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK 0x7FFull #define SEND_CTXT_CREDIT_FORCE (TXE + 0x000000100028) #define SEND_CTXT_CREDIT_FORCE_FORCE_RETURN_SMASK 0x1ull #define SEND_CTXT_CREDIT_RETURN_ADDR (TXE + 0x000000100020) diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 7108d4d92259..40d3cfb58bd1 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -136,18 +136,21 @@ HFI1_CAP_ALLOW_PERM_JKEY | \ HFI1_CAP_STATIC_RATE_CTRL | \ HFI1_CAP_PRINT_UNIMPL | \ - HFI1_CAP_TID_UNMAP) + HFI1_CAP_TID_UNMAP | \ + HFI1_CAP_OPFN) /* * A set of capability bits that are "global" and are not allowed to be * set in the user bitmask. */ #define HFI1_CAP_RESERVED_MASK ((HFI1_CAP_SDMA | \ - HFI1_CAP_USE_SDMA_HEAD | \ - HFI1_CAP_EXTENDED_PSN | \ - HFI1_CAP_PRINT_UNIMPL | \ - HFI1_CAP_NO_INTEGRITY | \ - HFI1_CAP_PKEY_CHECK) << \ - HFI1_CAP_USER_SHIFT) + HFI1_CAP_USE_SDMA_HEAD | \ + HFI1_CAP_EXTENDED_PSN | \ + HFI1_CAP_PRINT_UNIMPL | \ + HFI1_CAP_NO_INTEGRITY | \ + HFI1_CAP_PKEY_CHECK | \ + HFI1_CAP_TID_RDMA | \ + HFI1_CAP_OPFN) << \ + HFI1_CAP_USER_SHIFT) /* * Set of capabilities that need to be enabled for kernel context in * order to be allowed for user contexts, as well. diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 9f992ae36c89..0a557795563c 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -407,6 +407,54 @@ DEBUGFS_SEQ_FILE_OPS(rcds); DEBUGFS_SEQ_FILE_OPEN(rcds) DEBUGFS_FILE_OPS(rcds); +static void *_pios_seq_start(struct seq_file *s, loff_t *pos) +{ + struct hfi1_ibdev *ibd; + struct hfi1_devdata *dd; + + ibd = (struct hfi1_ibdev *)s->private; + dd = dd_from_dev(ibd); + if (!dd->send_contexts || *pos >= dd->num_send_contexts) + return NULL; + return pos; +} + +static void *_pios_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + + ++*pos; + if (!dd->send_contexts || *pos >= dd->num_send_contexts) + return NULL; + return pos; +} + +static void _pios_seq_stop(struct seq_file *s, void *v) +{ +} + +static int _pios_seq_show(struct seq_file *s, void *v) +{ + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + struct send_context_info *sci; + loff_t *spos = v; + loff_t i = *spos; + unsigned long flags; + + spin_lock_irqsave(&dd->sc_lock, flags); + sci = &dd->send_contexts[i]; + if (sci && sci->type != SC_USER && sci->allocated && sci->sc) + seqfile_dump_sci(s, i, sci); + spin_unlock_irqrestore(&dd->sc_lock, flags); + return 0; +} + +DEBUGFS_SEQ_FILE_OPS(pios); +DEBUGFS_SEQ_FILE_OPEN(pios) +DEBUGFS_FILE_OPS(pios); + /* read the per-device counters */ static ssize_t dev_counters_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) @@ -1143,6 +1191,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(rcds, ibd->hfi1_ibdev_dbg, ibd); + DEBUGFS_SEQ_FILE_CREATE(pios, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd); /* dev counter files */ for (i = 0; i < ARRAY_SIZE(cntr_ops); i++) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index a41f85558312..a8ad70730203 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -430,40 +430,60 @@ static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = { [HFI1_PKT_TYPE_16B] = &return_cnp_16B }; -void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, - bool do_cnp) +/** + * hfi1_process_ecn_slowpath - Process FECN or BECN bits + * @qp: The packet's destination QP + * @pkt: The packet itself. + * @prescan: Is the caller the RXQ prescan + * + * Process the packet's FECN or BECN bits. By now, the packet + * has already been evaluated whether processing of those bit should + * be done. + * The significance of the @prescan argument is that if the caller + * is the RXQ prescan, a CNP will be send out instead of waiting for the + * normal packet processing to send an ACK with BECN set (or a CNP). + */ +bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, + bool prescan) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct ib_other_headers *ohdr = pkt->ohdr; struct ib_grh *grh = pkt->grh; - u32 rqpn = 0, bth1; + u32 rqpn = 0; u16 pkey; u32 rlid, slid, dlid = 0; - u8 hdr_type, sc, svc_type; - bool is_mcast = false; + u8 hdr_type, sc, svc_type, opcode; + bool is_mcast = false, ignore_fecn = false, do_cnp = false, + fecn, becn; /* can be called from prescan */ if (pkt->etype == RHF_RCV_TYPE_BYPASS) { - is_mcast = hfi1_is_16B_mcast(dlid); pkey = hfi1_16B_get_pkey(pkt->hdr); sc = hfi1_16B_get_sc(pkt->hdr); dlid = hfi1_16B_get_dlid(pkt->hdr); slid = hfi1_16B_get_slid(pkt->hdr); + is_mcast = hfi1_is_16B_mcast(dlid); + opcode = ib_bth_get_opcode(ohdr); hdr_type = HFI1_PKT_TYPE_16B; + fecn = hfi1_16B_get_fecn(pkt->hdr); + becn = hfi1_16B_get_becn(pkt->hdr); } else { - is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && - (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); pkey = ib_bth_get_pkey(ohdr); sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf); - dlid = ib_get_dlid(pkt->hdr); + dlid = qp->ibqp.qp_type != IB_QPT_UD ? ib_get_dlid(pkt->hdr) : + ppd->lid; slid = ib_get_slid(pkt->hdr); + is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); + opcode = ib_bth_get_opcode(ohdr); hdr_type = HFI1_PKT_TYPE_9B; + fecn = ib_bth_get_fecn(ohdr); + becn = ib_bth_get_becn(ohdr); } switch (qp->ibqp.qp_type) { case IB_QPT_UD: - dlid = ppd->lid; rlid = slid; rqpn = ib_get_sqpn(pkt->ohdr); svc_type = IB_CC_SVCTYPE_UD; @@ -485,22 +505,31 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, svc_type = IB_CC_SVCTYPE_RC; break; default: - return; + return false; } - bth1 = be32_to_cpu(ohdr->bth[1]); + ignore_fecn = is_mcast || (opcode == IB_OPCODE_CNP) || + (opcode == IB_OPCODE_RC_ACKNOWLEDGE); + /* + * ACKNOWLEDGE packets do not get a CNP but this will be + * guarded by ignore_fecn above. + */ + do_cnp = prescan || + (opcode >= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST && + opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE); + /* Call appropriate CNP handler */ - if (do_cnp && (bth1 & IB_FECN_SMASK)) + if (!ignore_fecn && do_cnp && fecn) hfi1_handle_cnp_tbl[hdr_type](ibp, qp, rqpn, pkey, dlid, rlid, sc, grh); - if (!is_mcast && (bth1 & IB_BECN_SMASK)) { - u32 lqpn = bth1 & RVT_QPN_MASK; + if (becn) { + u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; u8 sl = ibp->sc_to_sl[sc]; process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type); } - + return !ignore_fecn && fecn; } struct ps_mdata { @@ -599,7 +628,6 @@ static void __prescan_rxq(struct hfi1_packet *packet) struct rvt_dev_info *rdi = &rcd->dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; - int is_ecn = 0; u8 lnh; if (ps_done(&mdata, rhf, rcd)) @@ -625,12 +653,10 @@ static void __prescan_rxq(struct hfi1_packet *packet) goto next; /* just in case */ } - bth1 = be32_to_cpu(packet->ohdr->bth[1]); - is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK)); - - if (!is_ecn) + if (!hfi1_may_ecn(packet)) goto next; + bth1 = be32_to_cpu(packet->ohdr->bth[1]); qpn = bth1 & RVT_QPN_MASK; rcu_read_lock(); qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn); @@ -640,7 +666,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) goto next; } - process_ecn(qp, packet, true); + hfi1_process_ecn_slowpath(qp, packet, true); rcu_read_unlock(); /* turn off BECN, FECN */ @@ -1400,7 +1426,7 @@ static int hfi1_bypass_ingress_pkt_check(struct hfi1_packet *packet) if ((!(hfi1_is_16B_mcast(packet->dlid))) && (packet->dlid != opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))) { - if (packet->dlid != ppd->lid) + if ((packet->dlid & ~((1 << ppd->lmc) - 1)) != ppd->lid) return -EINVAL; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 2b882347d0c2..6db2276f5c13 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1804,13 +1804,20 @@ static inline struct hfi1_ibport *rcd_to_iport(struct hfi1_ctxtdata *rcd) return &rcd->ppd->ibport_data; } -void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, - bool do_cnp); -static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, - bool do_cnp) +/** + * hfi1_may_ecn - Check whether FECN or BECN processing should be done + * @pkt: the packet to be evaluated + * + * Check whether the FECN or BECN bits in the packet's header are + * enabled, depending on packet type. + * + * This function only checks for FECN and BECN bits. Additional checks + * are done in the slowpath (hfi1_process_ecn_slowpath()) in order to + * ensure correct handling. + */ +static inline bool hfi1_may_ecn(struct hfi1_packet *pkt) { - bool becn; - bool fecn; + bool fecn, becn; if (pkt->etype == RHF_RCV_TYPE_BYPASS) { fecn = hfi1_16B_get_fecn(pkt->hdr); @@ -1819,10 +1826,18 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, fecn = ib_bth_get_fecn(pkt->ohdr); becn = ib_bth_get_becn(pkt->ohdr); } - if (unlikely(fecn || becn)) { - hfi1_process_ecn_slowpath(qp, pkt, do_cnp); - return fecn; - } + return fecn || becn; +} + +bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, + bool prescan); +static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt) +{ + bool do_work; + + do_work = hfi1_may_ecn(pkt); + if (unlikely(do_work)) + return hfi1_process_ecn_slowpath(qp, pkt, false); return false; } diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 88a0cf930136..4228393e6c4c 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -305,7 +305,7 @@ static struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u32 dlid) rcu_read_lock(); qp0 = rcu_dereference(ibp->rvp.qp[0]); if (qp0) - ah = rdma_create_ah(qp0->ibqp.pd, &attr); + ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0); rcu_read_unlock(); return ah; } diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 9ab50d2308dc..dd5a5c030066 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -742,6 +742,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, spin_lock_init(&sc->alloc_lock); spin_lock_init(&sc->release_lock); spin_lock_init(&sc->credit_ctrl_lock); + seqlock_init(&sc->waitlock); INIT_LIST_HEAD(&sc->piowait); INIT_WORK(&sc->halt_work, sc_halted); init_waitqueue_head(&sc->halt_wait); @@ -1593,7 +1594,6 @@ void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint) static void sc_piobufavail(struct send_context *sc) { struct hfi1_devdata *dd = sc->dd; - struct hfi1_ibdev *dev = &dd->verbs_dev; struct list_head *list; struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE]; struct rvt_qp *qp; @@ -1612,7 +1612,7 @@ static void sc_piobufavail(struct send_context *sc) * could end up with QPs on the wait list with the interrupt * disabled. */ - write_seqlock_irqsave(&dev->iowait_lock, flags); + write_seqlock_irqsave(&sc->waitlock, flags); while (!list_empty(list)) { struct iowait *wait; @@ -1636,7 +1636,7 @@ static void sc_piobufavail(struct send_context *sc) if (!list_empty(list)) hfi1_sc_wantpiobuf_intr(sc, 1); } - write_sequnlock_irqrestore(&dev->iowait_lock, flags); + write_sequnlock_irqrestore(&sc->waitlock, flags); /* Wake up the most starved one first */ if (n) @@ -2137,3 +2137,28 @@ void free_credit_return(struct hfi1_devdata *dd) kfree(dd->cr_base); dd->cr_base = NULL; } + +void seqfile_dump_sci(struct seq_file *s, u32 i, + struct send_context_info *sci) +{ + struct send_context *sc = sci->sc; + u64 reg; + + seq_printf(s, "SCI %u: type %u base %u credits %u\n", + i, sci->type, sci->base, sci->credits); + seq_printf(s, " flags 0x%x sw_inx %u hw_ctxt %u grp %u\n", + sc->flags, sc->sw_index, sc->hw_context, sc->group); + seq_printf(s, " sr_size %u credits %u sr_head %u sr_tail %u\n", + sc->sr_size, sc->credits, sc->sr_head, sc->sr_tail); + seq_printf(s, " fill %lu free %lu fill_wrap %u alloc_free %lu\n", + sc->fill, sc->free, sc->fill_wrap, sc->alloc_free); + seq_printf(s, " credit_intr_count %u credit_ctrl 0x%llx\n", + sc->credit_intr_count, sc->credit_ctrl); + reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_STATUS)); + seq_printf(s, " *hw_free %llu CurrentFree %llu LastReturned %llu\n", + (le64_to_cpu(*sc->hw_free) & CR_COUNTER_SMASK) >> + CR_COUNTER_SHIFT, + (reg >> SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT)) & + SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK), + reg & SC(CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK)); +} diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index aaf372c3e5d6..c9a58b642bdd 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -127,6 +127,8 @@ struct send_context { volatile __le64 *hw_free; /* HW free counter */ /* list for PIO waiters */ struct list_head piowait ____cacheline_aligned_in_smp; + seqlock_t waitlock; + spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp; u32 credit_intr_count; /* count of credit intr users */ u64 credit_ctrl; /* cache for credit control */ @@ -329,4 +331,7 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc, void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes); void seg_pio_copy_end(struct pio_buf *pbuf); +void seqfile_dump_sci(struct seq_file *s, u32 i, + struct send_context_info *sci); + #endif /* _PIO_H */ diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 1a016248039f..5344e8993b28 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -375,20 +375,18 @@ bool _hfi1_schedule_send(struct rvt_qp *qp) static void qp_pio_drain(struct rvt_qp *qp) { - struct hfi1_ibdev *dev; struct hfi1_qp_priv *priv = qp->priv; if (!priv->s_sendcontext) return; - dev = to_idev(qp->ibqp.device); while (iowait_pio_pending(&priv->s_iowait)) { - write_seqlock_irq(&dev->iowait_lock); + write_seqlock_irq(&priv->s_sendcontext->waitlock); hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1); - write_sequnlock_irq(&dev->iowait_lock); + write_sequnlock_irq(&priv->s_sendcontext->waitlock); iowait_pio_drain(&priv->s_iowait); - write_seqlock_irq(&dev->iowait_lock); + write_seqlock_irq(&priv->s_sendcontext->waitlock); hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0); - write_sequnlock_irq(&dev->iowait_lock); + write_sequnlock_irq(&priv->s_sendcontext->waitlock); } } @@ -459,7 +457,6 @@ static int iowait_sleep( struct hfi1_qp_priv *priv; unsigned long flags; int ret = 0; - struct hfi1_ibdev *dev; qp = tx->qp; priv = qp->priv; @@ -472,9 +469,8 @@ static int iowait_sleep( * buffer and undoing the side effects of the copy. */ /* Make a common routine? */ - dev = &sde->dd->verbs_dev; list_add_tail(&stx->list, &wait->tx_head); - write_seqlock(&dev->iowait_lock); + write_seqlock(&sde->waitlock); if (sdma_progress(sde, seq, stx)) goto eagain; if (list_empty(&priv->s_iowait.list)) { @@ -485,11 +481,11 @@ static int iowait_sleep( qp->s_flags |= RVT_S_WAIT_DMA_DESC; iowait_queue(pkts_sent, &priv->s_iowait, &sde->dmawait); - priv->s_iowait.lock = &dev->iowait_lock; + priv->s_iowait.lock = &sde->waitlock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); rvt_get_qp(qp); } - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); hfi1_qp_unbusy(qp, wait); spin_unlock_irqrestore(&qp->s_lock, flags); ret = -EBUSY; @@ -499,7 +495,7 @@ static int iowait_sleep( } return ret; eagain: - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); spin_unlock_irqrestore(&qp->s_lock, flags); list_del_init(&stx->list); return -EAGAIN; diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 188aa4f686a0..be603f35d7e4 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1157,6 +1157,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 && cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; + rvt_qp_wqe_unreserve(qp, wqe); s_last = qp->s_last; trace_hfi1_qp_send_completion(qp, wqe, s_last); if (++s_last >= qp->s_size) @@ -1209,6 +1210,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, u32 s_last; rvt_put_swqe(wqe); + rvt_qp_wqe_unreserve(qp, wqe); s_last = qp->s_last; trace_hfi1_qp_send_completion(qp, wqe, s_last); if (++s_last >= qp->s_size) @@ -2049,8 +2051,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) struct ib_reth *reth; unsigned long flags; int ret; - bool is_fecn = false; - bool copy_last = false; + bool copy_last = false, fecn; u32 rkey; u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2); @@ -2059,7 +2060,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) if (hfi1_ruc_check_hdr(ibp, packet)) return; - is_fecn = process_ecn(qp, packet, false); + fecn = process_ecn(qp, packet); /* * Process responses (ACKs) before anything else. Note that the @@ -2070,8 +2071,6 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && opcode <= OP(ATOMIC_ACKNOWLEDGE)) { rc_rcv_resp(packet); - if (is_fecn) - goto send_ack; return; } @@ -2347,11 +2346,11 @@ send_last: /* Schedule the send engine. */ qp->s_flags |= RVT_S_RESP_PENDING; + if (fecn) + qp->s_flags |= RVT_S_ECN; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - goto send_ack; return; } @@ -2413,11 +2412,11 @@ send_last: /* Schedule the send engine. */ qp->s_flags |= RVT_S_RESP_PENDING; + if (fecn) + qp->s_flags |= RVT_S_ECN; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - goto send_ack; return; } @@ -2430,16 +2429,9 @@ send_last: qp->r_ack_psn = psn; qp->r_nak_state = 0; /* Send an ACK if requested or required. */ - if (psn & IB_BTH_REQ_ACK) { - if (packet->numpkt == 0) { - rc_cancel_ack(qp); - goto send_ack; - } - if (qp->r_adefered >= HFI1_PSN_CREDIT) { - rc_cancel_ack(qp); - goto send_ack; - } - if (unlikely(is_fecn)) { + if (psn & IB_BTH_REQ_ACK || fecn) { + if (packet->numpkt == 0 || fecn || + qp->r_adefered >= HFI1_PSN_CREDIT) { rc_cancel_ack(qp); goto send_ack; } @@ -2480,7 +2472,7 @@ nack_acc: qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; send_ack: - hfi1_send_rc_ack(packet, is_fecn); + hfi1_send_rc_ack(packet, fecn); } void hfi1_rc_hdrerr( diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 891d2386d1ca..b84356e1a4c1 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1424,6 +1424,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) seqlock_init(&sde->head_lock); spin_lock_init(&sde->senddmactrl_lock); spin_lock_init(&sde->flushlist_lock); + seqlock_init(&sde->waitlock); /* insure there is always a zero bit */ sde->ahg_bits = 0xfffffffe00000000ULL; @@ -1758,7 +1759,6 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail) struct iowait *wait, *nw; struct iowait *waits[SDMA_WAIT_BATCH_SIZE]; uint i, n = 0, seq, max_idx = 0; - struct hfi1_ibdev *dev = &sde->dd->verbs_dev; u8 max_starved_cnt = 0; #ifdef CONFIG_SDMA_VERBOSITY @@ -1768,10 +1768,10 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail) #endif do { - seq = read_seqbegin(&dev->iowait_lock); + seq = read_seqbegin(&sde->waitlock); if (!list_empty(&sde->dmawait)) { /* at least one item */ - write_seqlock(&dev->iowait_lock); + write_seqlock(&sde->waitlock); /* Harvest waiters wanting DMA descriptors */ list_for_each_entry_safe( wait, @@ -1794,10 +1794,10 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail) list_del_init(&wait->list); waits[n++] = wait; } - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); break; } - } while (read_seqretry(&dev->iowait_lock, seq)); |