diff options
| author | Jason Gunthorpe <jgg@mellanox.com> | 2018-08-16 14:13:03 -0600 |
|---|---|---|
| committer | Jason Gunthorpe <jgg@mellanox.com> | 2018-08-16 14:21:29 -0600 |
| commit | 0a3173a5f09bc58a3638ecfd0a80bdbae55e123c (patch) | |
| tree | d6c0bc84863cca54dfbde3b7463e5d49c82af9f1 /net/rds | |
| parent | 92f4e77c85918eab5e5803d7e28ab89a7e6bd3a2 (diff) | |
| parent | 5c60a7389d795e001c8748b458eb76e3a5b6008c (diff) | |
| download | linux-0a3173a5f09bc58a3638ecfd0a80bdbae55e123c.tar.gz linux-0a3173a5f09bc58a3638ecfd0a80bdbae55e123c.tar.bz2 linux-0a3173a5f09bc58a3638ecfd0a80bdbae55e123c.zip | |
Merge branch 'linus/master' into rdma.git for-next
rdma.git merge resolution for the 4.19 merge window
Conflicts:
drivers/infiniband/core/rdma_core.c
- Use the rdma code and revise with the new spelling for
atomic_fetch_add_unless
drivers/nvme/host/rdma.c
- Replace max_sge with max_send_sge in new blk code
drivers/nvme/target/rdma.c
- Use the blk code and revise to use NULL for ib_post_recv when
appropriate
- Replace max_sge with max_recv_sge in new blk code
net/rds/ib_send.c
- Use the net code and revise to use NULL for ib_post_recv when
appropriate
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'net/rds')
| -rw-r--r-- | net/rds/Kconfig | 2 | ||||
| -rw-r--r-- | net/rds/Makefile | 1 | ||||
| -rw-r--r-- | net/rds/af_rds.c | 205 | ||||
| -rw-r--r-- | net/rds/bind.c | 138 | ||||
| -rw-r--r-- | net/rds/cong.c | 23 | ||||
| -rw-r--r-- | net/rds/connection.c | 283 | ||||
| -rw-r--r-- | net/rds/ib.c | 136 | ||||
| -rw-r--r-- | net/rds/ib.h | 53 | ||||
| -rw-r--r-- | net/rds/ib_cm.c | 320 | ||||
| -rw-r--r-- | net/rds/ib_frmr.c | 1 | ||||
| -rw-r--r-- | net/rds/ib_mr.h | 2 | ||||
| -rw-r--r-- | net/rds/ib_rdma.c | 26 | ||||
| -rw-r--r-- | net/rds/ib_recv.c | 33 | ||||
| -rw-r--r-- | net/rds/ib_send.c | 13 | ||||
| -rw-r--r-- | net/rds/loop.c | 7 | ||||
| -rw-r--r-- | net/rds/message.c | 1 | ||||
| -rw-r--r-- | net/rds/rdma.c | 6 | ||||
| -rw-r--r-- | net/rds/rdma_transport.c | 95 | ||||
| -rw-r--r-- | net/rds/rdma_transport.h | 5 | ||||
| -rw-r--r-- | net/rds/rds.h | 88 | ||||
| -rw-r--r-- | net/rds/recv.c | 78 | ||||
| -rw-r--r-- | net/rds/send.c | 116 | ||||
| -rw-r--r-- | net/rds/tcp.c | 154 | ||||
| -rw-r--r-- | net/rds/tcp.h | 2 | ||||
| -rw-r--r-- | net/rds/tcp_connect.c | 68 | ||||
| -rw-r--r-- | net/rds/tcp_listen.c | 87 | ||||
| -rw-r--r-- | net/rds/tcp_recv.c | 9 | ||||
| -rw-r--r-- | net/rds/tcp_send.c | 4 | ||||
| -rw-r--r-- | net/rds/threads.c | 69 | ||||
| -rw-r--r-- | net/rds/transport.c | 16 |
30 files changed, 1600 insertions, 441 deletions
diff --git a/net/rds/Kconfig b/net/rds/Kconfig index bffde4b46c5d..01b3bd6a3708 100644 --- a/net/rds/Kconfig +++ b/net/rds/Kconfig @@ -16,6 +16,7 @@ config RDS_RDMA config RDS_TCP tristate "RDS over TCP" depends on RDS + depends on IPV6 || !IPV6 ---help--- Allow RDS to use TCP as a transport. This transport does not support RDMA operations. @@ -24,4 +25,3 @@ config RDS_DEBUG bool "RDS debugging messages" depends on RDS default n - diff --git a/net/rds/Makefile b/net/rds/Makefile index b5d568bd479c..e647f9de104a 100644 --- a/net/rds/Makefile +++ b/net/rds/Makefile @@ -15,4 +15,3 @@ rds_tcp-y := tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \ tcp_send.o tcp_stats.o ccflags-$(CONFIG_RDS_DEBUG) := -DRDS_DEBUG - diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index ab751a150f70..65387e1e6964 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Oracle. All rights reserved. + * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -35,6 +35,7 @@ #include <linux/kernel.h> #include <linux/gfp.h> #include <linux/in.h> +#include <linux/ipv6.h> #include <linux/poll.h> #include <net/sock.h> @@ -113,26 +114,82 @@ void rds_wake_sk_sleep(struct rds_sock *rs) static int rds_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { - struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; struct rds_sock *rs = rds_sk_to_rs(sock->sk); - - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + struct sockaddr_in6 *sin6; + struct sockaddr_in *sin; + int uaddr_len; /* racey, don't care */ if (peer) { - if (!rs->rs_conn_addr) + if (ipv6_addr_any(&rs->rs_conn_addr)) return -ENOTCONN; - sin->sin_port = rs->rs_conn_port; - sin->sin_addr.s_addr = rs->rs_conn_addr; + if (ipv6_addr_v4mapped(&rs->rs_conn_addr)) { + sin = (struct sockaddr_in *)uaddr; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + sin->sin_family = AF_INET; + sin->sin_port = rs->rs_conn_port; + sin->sin_addr.s_addr = rs->rs_conn_addr_v4; + uaddr_len = sizeof(*sin); + } else { + sin6 = (struct sockaddr_in6 *)uaddr; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = rs->rs_conn_port; + sin6->sin6_addr = rs->rs_conn_addr; + sin6->sin6_flowinfo = 0; + /* scope_id is the same as in the bound address. */ + sin6->sin6_scope_id = rs->rs_bound_scope_id; + uaddr_len = sizeof(*sin6); + } } else { - sin->sin_port = rs->rs_bound_port; - sin->sin_addr.s_addr = rs->rs_bound_addr; + /* If socket is not yet bound and the socket is connected, + * set the return address family to be the same as the + * connected address, but with 0 address value. If it is not + * connected, set the family to be AF_UNSPEC (value 0) and + * the address size to be that of an IPv4 address. + */ + if (ipv6_addr_any(&rs->rs_bound_addr)) { + if (ipv6_addr_any(&rs->rs_conn_addr)) { + sin = (struct sockaddr_in *)uaddr; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_UNSPEC; + return sizeof(*sin); + } + +#if IS_ENABLED(CONFIG_IPV6) + if (!(ipv6_addr_type(&rs->rs_conn_addr) & + IPV6_ADDR_MAPPED)) { + sin6 = (struct sockaddr_in6 *)uaddr; + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + return sizeof(*sin6); + } +#endif + + sin = (struct sockaddr_in *)uaddr; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + return sizeof(*sin); + } + if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) { + sin = (struct sockaddr_in *)uaddr; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + sin->sin_family = AF_INET; + sin->sin_port = rs->rs_bound_port; + sin->sin_addr.s_addr = rs->rs_bound_addr_v4; + uaddr_len = sizeof(*sin); + } else { + sin6 = (struct sockaddr_in6 *)uaddr; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = rs->rs_bound_port; + sin6->sin6_addr = rs->rs_bound_addr; + sin6->sin6_flowinfo = 0; + sin6->sin6_scope_id = rs->rs_bound_scope_id; + uaddr_len = sizeof(*sin6); + } } - sin->sin_family = AF_INET; - - return sizeof(*sin); + return uaddr_len; } /* @@ -203,11 +260,12 @@ static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, int len) { + struct sockaddr_in6 sin6; struct sockaddr_in sin; int ret = 0; /* racing with another thread binding seems ok here */ - if (rs->rs_bound_addr == 0) { + if (ipv6_addr_any(&rs->rs_bound_addr)) { ret = -ENOTCONN; /* XXX not a great errno */ goto out; } @@ -215,14 +273,23 @@ static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, if (len < sizeof(struct sockaddr_in)) { ret = -EINVAL; goto out; + } else if (len < sizeof(struct sockaddr_in6)) { + /* Assume IPv4 */ + if (copy_from_user(&sin, optval, sizeof(struct sockaddr_in))) { + ret = -EFAULT; + goto out; + } + ipv6_addr_set_v4mapped(sin.sin_addr.s_addr, &sin6.sin6_addr); + sin6.sin6_port = sin.sin_port; + } else { + if (copy_from_user(&sin6, optval, + sizeof(struct sockaddr_in6))) { + ret = -EFAULT; + goto out; + } } - if (copy_from_user(&sin, optval, sizeof(sin))) { - ret = -EFAULT; - goto out; - } - - rds_send_drop_to(rs, &sin); + rds_send_drop_to(rs, &sin6); out: return ret; } @@ -435,31 +502,91 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; - struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; + struct sockaddr_in *sin; struct rds_sock *rs = rds_sk_to_rs(sk); int ret = 0; lock_sock(sk); - if (addr_len != sizeof(struct sockaddr_in)) { - ret = -EINVAL; - goto out; - } + switch (uaddr->sa_family) { + case AF_INET: + sin = (struct sockaddr_in *)uaddr; + if (addr_len < sizeof(struct sockaddr_in)) { + ret = -EINVAL; + break; + } + if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) { + ret = -EDESTADDRREQ; + break; + } + if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) || + sin->sin_addr.s_addr == htonl(INADDR_BROADCAST)) { + ret = -EINVAL; + break; + } + ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &rs->rs_conn_addr); + rs->rs_conn_port = sin->sin_port; + break; - if (sin->sin_family != AF_INET) { - ret = -EAFNOSUPPORT; - goto out; - } +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: { + struct sockaddr_in6 *sin6; + int addr_type; - if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) { - ret = -EDESTADDRREQ; - goto out; + sin6 = (struct sockaddr_in6 *)uaddr; + if (addr_len < sizeof(struct sockaddr_in6)) { + ret = -EINVAL; + break; + } + addr_type = ipv6_addr_type(&sin6->sin6_addr); + if (!(addr_type & IPV6_ADDR_UNICAST)) { + __be32 addr4; + + if (!(addr_type & IPV6_ADDR_MAPPED)) { + ret = -EPROTOTYPE; + break; + } + + /* It is a mapped address. Need to do some sanity + * checks. + */ + addr4 = sin6->sin6_addr.s6_addr32[3]; + if (addr4 == htonl(INADDR_ANY) || + addr4 == htonl(INADDR_BROADCAST) || + IN_MULTICAST(ntohl(addr4))) { + ret = -EPROTOTYPE; + break; + } + } + + if (addr_type & IPV6_ADDR_LINKLOCAL) { + /* If socket is arleady bound to a link local address, + * the peer address must be on the same link. + */ + if (sin6->sin6_scope_id == 0 || + (!ipv6_addr_any(&rs->rs_bound_addr) && + rs->rs_bound_scope_id && + sin6->sin6_scope_id != rs->rs_bound_scope_id)) { + ret = -EINVAL; + break; + } + /* Remember the connected address scope ID. It will + * be checked against the binding local address when + * the socket is bound. + */ + rs->rs_bound_scope_id = sin6->sin6_scope_id; + } + rs->rs_conn_addr = sin6->sin6_addr; + rs->rs_conn_port = sin6->sin6_port; + break; } +#endif - rs->rs_conn_addr = sin->sin_addr.s_addr; - rs->rs_conn_port = sin->sin_port; + default: + ret = -EAFNOSUPPORT; + break; + } -out: release_sock(sk); return ret; } @@ -578,8 +705,10 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len, list_for_each_entry(inc, &rs->rs_recv_queue, i_item) { total++; if (total <= len) - rds_inc_info_copy(inc, iter, inc->i_saddr, - rs->rs_bound_addr, 1); + rds_inc_info_copy(inc, iter, + inc->i_saddr.s6_addr32[3], + rs->rs_bound_addr_v4, + 1); } read_unlock(&rs->rs_recv_lock); @@ -608,8 +737,8 @@ static void rds_sock_info(struct socket *sock, unsigned int len, list_for_each_entry(rs, &rds_sock_list, rs_item) { sinfo.sndbuf = rds_sk_sndbuf(rs); sinfo.rcvbuf = rds_sk_rcvbuf(rs); - sinfo.bound_addr = rs->rs_bound_addr; - sinfo.connected_addr = rs->rs_conn_addr; + sinfo.bound_addr = rs->rs_bound_addr_v4; + sinfo.connected_addr = rs->rs_conn_addr_v4; sinfo.bound_port = rs->rs_bound_port; sinfo.connected_port = rs->rs_conn_port; sinfo.inum = sock_i_ino(rds_rs_to_sk(rs)); diff --git a/net/rds/bind.c b/net/rds/bind.c index 5aa3a64aa4f0..3ab55784b637 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Oracle. All rights reserved. + * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -33,6 +33,7 @@ #include <linux/kernel.h> #include <net/sock.h> #include <linux/in.h> +#include <linux/ipv6.h> #include <linux/if_arp.h> #include <linux/jhash.h> #include <linux/ratelimit.h> @@ -42,42 +43,58 @@ static struct rhashtable bind_hash_table; static const struct rhashtable_params ht_parms = { .nelem_hint = 768, - .key_len = sizeof(u64), + .key_len = RDS_BOUND_KEY_LEN, .key_offset = offsetof(struct rds_sock, rs_bound_key), .head_offset = offsetof(struct rds_sock, rs_bound_node), .max_size = 16384, .min_size = 1024, }; +/* Create a key for the bind hash table manipulation. Port is in network byte + * order. + */ +static inline void __rds_create_bind_key(u8 *key, const struct in6_addr *addr, + __be16 port, __u32 scope_id) +{ + memcpy(key, addr, sizeof(*addr)); + key += sizeof(*addr); + memcpy(key, &port, sizeof(port)); + key += sizeof(port); + memcpy(key, &scope_id, sizeof(scope_id)); +} + /* * Return the rds_sock bound at the given local address. * * The rx path can race with rds_release. We notice if rds_release() has * marked this socket and don't return a rs ref to the rx path. */ -struct rds_sock *rds_find_bound(__be32 addr, __be16 port) +struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port, + __u32 scope_id) { - u64 key = ((u64)addr << 32) | port; + u8 key[RDS_BOUND_KEY_LEN]; struct rds_sock *rs; - rs = rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms); + __rds_create_bind_key(key, addr, port, scope_id); + rs = rhashtable_lookup_fast(&bind_hash_table, key, ht_parms); if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) rds_sock_addref(rs); else rs = NULL; - rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr, - ntohs(port)); + rdsdebug("returning rs %p for %pI6c:%u\n", rs, addr, + ntohs(port)); return rs; } /* returns -ve errno or +ve port */ -static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) +static int rds_add_bound(struct rds_sock *rs, const struct in6_addr *addr, + __be16 *port, __u32 scope_id) { int ret = -EADDRINUSE; u16 rover, last; - u64 key; + u8 key[RDS_BOUND_KEY_LEN]; if (*port != 0) { rover = be16_to_cpu(*port); @@ -95,12 +112,13 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) if (rover == RDS_FLAG_PROBE_PORT) continue; - key = ((u64)addr << 32) | cpu_to_be16(rover); - if (rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms)) + __rds_create_bind_key(key, addr, cpu_to_be16(rover), + scope_id); + if (rhashtable_lookup_fast(&bind_hash_table, key, ht_parms)) continue; - rs->rs_bound_key = key; - rs->rs_bound_addr = addr; + memcpy(rs->rs_bound_key, key, sizeof(rs->rs_bound_key)); + rs->rs_bound_addr = *addr; net_get_random_once(&rs->rs_hash_initval, sizeof(rs->rs_hash_initval)); rs->rs_bound_port = cpu_to_be16(rover); @@ -109,12 +127,13 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) if (!rhashtable_insert_fast(&bind_hash_table, &rs->rs_bound_node, ht_parms)) { *port = rs->rs_bound_port; + rs->rs_bound_scope_id = scope_id; ret = 0; - rdsdebug("rs %p binding to %pI4:%d\n", - rs, &addr, (int)ntohs(*port)); + rdsdebug("rs %p binding to %pI6c:%d\n", + rs, addr, (int)ntohs(*port)); break; } else { - rs->rs_bound_addr = 0; + rs->rs_bound_addr = in6addr_any; rds_sock_put(rs); ret = -ENOMEM; break; @@ -127,44 +146,103 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) void rds_remove_bound(struct rds_sock *rs) { - if (!rs->rs_bound_addr) + if (ipv6_addr_any(&rs->rs_bound_addr)) return; - rdsdebug("rs %p unbinding from %pI4:%d\n", + rdsdebug("rs %p unbinding from %pI6c:%d\n", rs, &rs->rs_bound_addr, ntohs(rs->rs_bound_port)); rhashtable_remove_fast(&bind_hash_table, &rs->rs_bound_node, ht_parms); rds_sock_put(rs); - rs->rs_bound_addr = 0; + rs->rs_bound_addr = in6addr_any; } int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; - struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; struct rds_sock *rs = rds_sk_to_rs(sk); + struct in6_addr v6addr, *binding_addr; struct rds_transport *trans; + __u32 scope_id = 0; int ret = 0; + __be16 port; + + /* We allow an RDS socket to be bound to either IPv4 or IPv6 + * address. + */ + if (uaddr->sa_family == AF_INET) { + struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; + + if (addr_len < sizeof(struct sockaddr_in) || + sin->sin_addr.s_addr == htonl(INADDR_ANY) || + sin->sin_addr.s_addr == htonl(INADDR_BROADCAST) || + IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) + return -EINVAL; + ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr); + binding_addr = &v6addr; + port = sin->sin_port; +#if IS_ENABLED(CONFIG_IPV6) + } else if (uaddr->sa_family == AF_INET6) { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)uaddr; + int addr_type; + + if (addr_len < sizeof(struct sockaddr_in6)) + return -EINVAL; + addr_type = ipv6_addr_type(&sin6->sin6_addr); + if (!(addr_type & IPV6_ADDR_UNICAST)) { + __be32 addr4; + if (!(addr_type & IPV6_ADDR_MAPPED)) + return -EINVAL; + + /* It is a mapped address. Need to do some sanity + * checks. + */ + addr4 = sin6->sin6_addr.s6_addr32[3]; + if (addr4 == htonl(INADDR_ANY) || + addr4 == htonl(INADDR_BROADCAST) || + IN_MULTICAST(ntohl(addr4))) + return -EINVAL; + } + /* The scope ID must be specified for link local address. */ + if (addr_type & IPV6_ADDR_LINKLOCAL) { + if (sin6->sin6_scope_id == 0) + return -EINVAL; + scope_id = sin6->sin6_scope_id; + } + binding_addr = &sin6->sin6_addr; + port = sin6->sin6_port; +#endif + } else { + return -EINVAL; + } lock_sock(sk); - if (addr_len != sizeof(struct sockaddr_in) || - sin->sin_family != AF_INET || - rs->rs_bound_addr || - sin->sin_addr.s_addr == htonl(INADDR_ANY)) { + /* RDS socket does not allow re-binding. */ + if (!ipv6_addr_any(&rs->rs_bound_addr)) { + ret = -EINVAL; + goto out; + } + /* Socket is connected. The binding address should have the same + * scope ID as the connected address, except the case when one is + * non-link local address (scope_id is 0). + */ + if (!ipv6_addr_any(&rs->rs_conn_addr) && scope_id && + rs->rs_bound_scope_id && + scope_id != rs->rs_bound_scope_id) { ret = -EINVAL; goto out; } - ret = rds_add_bound(rs, sin->sin_addr.s_addr, &sin->sin_port); + ret = rds_add_bound(rs, binding_addr, &port, scope_id); if (ret) goto out; if (rs->rs_transport) { /* previously bound */ trans = rs->rs_transport; if (trans->laddr_check(sock_net(sock->sk), - sin->sin_addr.s_addr) != 0) { + binding_addr, scope_id) != 0) { ret = -ENOPROTOOPT; rds_remove_bound(rs); } else { @@ -172,13 +250,13 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } goto out; } - trans = rds_trans_get_preferred(sock_net(sock->sk), - sin->sin_addr.s_addr); + trans = rds_trans_get_preferred(sock_net(sock->sk), binding_addr, + scope_id); if (!trans) { ret = -EADDRNOTAVAIL; rds_remove_bound(rs); - pr_info_ratelimited("RDS: %s could not find a transport for %pI4, load rds_tcp or rds_rdma?\n", - __func__, &sin->sin_addr.s_addr); + pr_info_ratelimited("RDS: %s could not find a transport for %pI6c, load rds_tcp or rds_rdma?\n", + __func__, binding_addr); goto out; } diff --git a/net/rds/cong.c b/net/rds/cong.c index 63da9d2f142d..ccdff09a79c8 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Oracle. All rights reserved. + * Copyright (c) 2007, 2017 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -101,7 +101,7 @@ static DEFINE_RWLOCK(rds_cong_monitor_lock); static DEFINE_SPINLOCK(rds_cong_lock); static struct rb_root rds_cong_tree = RB_ROOT; -static struct rds_cong_map *rds_cong_tree_walk(__be32 addr, +static struct rds_cong_map *rds_cong_tree_walk(const struct in6_addr *addr, struct rds_cong_map *insert) { struct rb_node **p = &rds_cong_tree.rb_node; @@ -109,12 +109,15 @@ static struct rds_cong_map *rds_cong_tree_walk(__be32 addr, struct rds_cong_map *map; while (*p) { + int diff; + parent = *p; map = rb_entry(parent, struct rds_cong_map, m_rb_node); - if (addr < map->m_addr) + diff = rds_addr_cmp(addr, &map->m_addr); + if (diff < 0) p = &(*p)->rb_left; - else if (addr > map->m_addr) + else if (diff > 0) p = &(*p)->rb_right; else return map; @@ -132,7 +135,7 @@ static struct rds_cong_map *rds_cong_tree_walk(__be32 addr, * these bitmaps in the process getting pointers to them. The bitmaps are only * ever freed as the module is removed after all connections have been freed. */ -static struct rds_cong_map *rds_cong_from_addr(__be32 addr) +static struct rds_cong_map *rds_cong_from_addr(const struct in6_addr *addr) { struct rds_cong_map *map; struct rds_cong_map *ret = NULL; @@ -144,7 +147,7 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr) if (!map) return NULL; - map->m_addr = addr; + map->m_addr = *addr; init_waitqueue_head(&map->m_waitq); INIT_LIST_HEAD(&map->m_conn_list); @@ -171,7 +174,7 @@ out: kfree(map); } - rdsdebug("map %p for addr %x\n", ret, be32_to_cpu(addr)); + rdsdebug("map %p for addr %pI6c\n", ret, addr); return ret; } @@ -202,8 +205,8 @@ void rds_cong_remove_conn(struct rds_connection *conn) int rds_cong_get_maps(struct rds_connection *conn) { - conn->c_lcong = rds_cong_from_addr(conn->c_laddr); - conn->c_fcong = rds_cong_from_addr(conn->c_faddr); + conn->c_lcong = rds_cong_from_addr(&conn->c_laddr); + conn->c_fcong = rds_cong_from_addr(&conn->c_faddr); if (!(conn->c_lcong && conn->c_fcong)) return -ENOMEM; @@ -353,7 +356,7 @@ void rds_cong_remove_socket(struct rds_sock *rs) /* update congestion map for now-closed port */ spin_lock_irqsave(&rds_cong_lock, flags); - map = rds_cong_tree_walk(rs->rs_bound_addr, NULL); + map = rds_cong_tree_walk(&rs->rs_bound_addr, NULL); spin_unlock_irqrestore(&rds_cong_lock, flags); if (map && rds_cong_test_bit(map, rs->rs_bound_port)) { diff --git a/net/rds/connection.c b/net/rds/connection.c index cfb05953b0e5..3bd2f4a5a30d 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Oracle. All rights reserved. + * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -34,7 +34,9 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/export.h> -#include <net/inet_hashtables.h> +#include <net/ipv6.h> +#include <net/inet6_hashtables.h> +#include <net/addrconf.h> #include "rds.h" #include "loop.h" @@ -49,18 +51,25 @@ static unsigned long rds_conn_count; static struct hlist_head rds_conn_hash[RDS_CONNECTION_HASH_ENTRIES]; static struct kmem_cache *rds_conn_slab; -static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr) +static struct hlist_head *rds_conn_bucket(const struct in6_addr *laddr, + const struct in6_addr *faddr) { + static u32 rds6_hash_secret __read_mostly; static u32 rds_hash_secret __read_mostly; - unsigned long hash; + u32 lhash, fhash, hash; net_get_random_once(&rds_hash_secret, sizeof(rds_hash_secret)); + net_get_random_once(&rds6_hash_secret, sizeof(rds6_hash_secret)); + + lhash = (__force u32)laddr->s6_addr32[3]; +#if IS_ENABLED(CONFIG_IPV6) + fhash = __ipv6_addr_jhash(faddr, rds6_hash_secret); +#else + fhash = (__force u32)faddr->s6_addr32[3]; +#endif + hash = __inet_ehashfn(lhash, 0, fhash, 0, rds_hash_secret); - /* Pass NULL, don't need struct net for hash */ - hash = __inet_ehashfn(be32_to_cpu(laddr), 0, - be32_to_cpu(faddr), 0, - rds_hash_secret); return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK]; } @@ -72,20 +81,25 @@ static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr) /* rcu read lock must be held or the connection spinlock */ static struct rds_connection *rds_conn_lookup(struct net *net, struct hlist_head *head, - __be32 laddr, __be32 faddr, - struct rds_transport *trans) + const struct in6_addr *laddr, + const struct in6_addr *faddr, + struct rds_transport *trans, + int dev_if) { struct rds_connection *conn, *ret = NULL; hlist_for_each_entry_rcu(conn, head, c_hash_node) { - if (conn->c_faddr == faddr && conn->c_laddr == laddr && - conn->c_trans == trans && net == rds_conn_net(conn)) { + if (ipv6_addr_equal(&conn->c_faddr, faddr) && + ipv6_addr_equal(&conn->c_laddr, laddr) && + conn->c_trans == trans && + net == rds_conn_net(conn) && + conn->c_dev_if == dev_if) { ret = conn; break; } } - rdsdebug("returning conn %p for %pI4 -> %pI4\n", ret, - &laddr, &faddr); + rdsdebug("returning conn %p for %pI6c -> %pI6c\n", ret, + laddr, faddr); return ret; } @@ -99,8 +113,8 @@ static void rds_conn_path_reset(struct rds_conn_path *cp) { struct rds_connection *conn = cp->cp_conn; - rdsdebug("connection %pI4 to %pI4 reset\n", - &conn->c_laddr, &conn->c_faddr); + rdsdebug("connection %pI6c to %pI6c reset\n", + &conn->c_laddr, &conn->c_faddr); rds_stats_inc(s_conn_reset); rds_send_path_reset(cp); @@ -142,9 +156,12 @@ static void __rds_conn_path_init(struct rds_connection *conn, * are torn down as the module is removed, if ever. */ static struct rds_connection *__rds_conn_create(struct net *net, - __be32 laddr, __be32 faddr, - struct rds_transport *trans, gfp_t gfp, - int is_outgoing) + const struct in6_addr *laddr, + const struct in6_addr *faddr, + struct rds_transport *trans, + gfp_t gfp, + int is_outgoing, + int dev_if) { struct rds_connection *conn, *parent = NULL; struct hlist_head *head = rds_conn_bucket(laddr, faddr); @@ -154,9 +171,12 @@ static struct rds_connection *__rds_conn_create(struct net *net, int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1); rcu_read_lock(); - conn = rds_conn_lookup(net, head, laddr, faddr, trans); - if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && - laddr == faddr && !is_outgoing) { + conn = rds_conn_lookup(net, head, laddr, faddr, trans, dev_if); + if (conn && + conn->c_loopback && + conn->c_trans != &rds_loop_transport && + ipv6_addr_equal(laddr, faddr) && + !is_outgoing) { /* This is a looped back IB connection, and we're * called by the code handling the incoming connect. * We need a second connection object into which we @@ -181,8 +201,22 @@ static struct rds_connection *__rds_conn_create(struct net *net, } INIT_HLIST_NODE(&conn->c_hash_node); - conn->c_laddr = laddr; - conn->c_faddr = faddr; + conn->c_laddr = *laddr; + conn->c_isv6 = !ipv6_addr_v4mapped(laddr); + conn->c_faddr = *faddr; + conn->c_dev_if = dev_if; |
