summaryrefslogtreecommitdiff
path: root/include/net/inetpeer.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-24 10:01:50 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-24 10:01:50 -0700
commit3c4cfadef6a1665d9cd02a543782d03d3e6740c6 (patch)
tree3df72faaacd494d5ac8c9668df4f529b1b5e4457 /include/net/inetpeer.h
parente017507f37d5cb8b541df165a824958bc333bec3 (diff)
parent320f5ea0cedc08ef65d67e056bcb9d181386ef2c (diff)
downloadlinux-3c4cfadef6a1665d9cd02a543782d03d3e6740c6.tar.gz
linux-3c4cfadef6a1665d9cd02a543782d03d3e6740c6.tar.bz2
linux-3c4cfadef6a1665d9cd02a543782d03d3e6740c6.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David S Miller: 1) Remove the ipv4 routing cache. Now lookups go directly into the FIB trie and use prebuilt routes cached there. No more garbage collection, no more rDOS attacks on the routing cache. Instead we now get predictable and consistent performance, no matter what the pattern of traffic we service. This has been almost 2 years in the making. Special thanks to Julian Anastasov, Eric Dumazet, Steffen Klassert, and others who have helped along the way. I'm sure that with a change of this magnitude there will be some kind of fallout, but such things ought the be simple to fix at this point. Luckily I'm not European so I'll be around all of August to fix things :-) The major stages of this work here are each fronted by a forced merge commit whose commit message contains a top-level description of the motivations and implementation issues. 2) Pre-demux of established ipv4 TCP sockets, saves a route demux on input. 3) TCP SYN/ACK performance tweaks from Eric Dumazet. 4) Add namespace support for netfilter L4 conntrack helpers, from Gao Feng. 5) Add config mechanism for Energy Efficient Ethernet to ethtool, from Yuval Mintz. 6) Remove quadratic behavior from /proc/net/unix, from Eric Dumazet. 7) Support for connection tracker helpers in userspace, from Pablo Neira Ayuso. 8) Allow userspace driven TX load balancing functions in TEAM driver, from Jiri Pirko. 9) Kill off NLMSG_PUT and RTA_PUT macros, more gross stuff with embedded gotos. 10) TCP Small Queues, essentially minimize the amount of TCP data queued up in the packet scheduler layer. Whereas the existing BQL (Byte Queue Limits) limits the pkt_sched --> netdevice queuing levels, this controls the TCP --> pkt_sched queueing levels. From Eric Dumazet. 11) Reduce the number of get_page/put_page ops done on SKB fragments, from Alexander Duyck. 12) Implement protection against blind resets in TCP (RFC 5961), from Eric Dumazet. 13) Support the client side of TCP Fast Open, basically the ability to send data in the SYN exchange, from Yuchung Cheng. Basically, the sender queues up data with a sendmsg() call using MSG_FASTOPEN, then they do the connect() which emits the queued up fastopen data. 14) Avoid all the problems we get into in TCP when timers or PMTU events hit a locked socket. The TCP Small Queues changes added a tcp_release_cb() that allows us to queue work up to the release_sock() caller, and that's what we use here too. From Eric Dumazet. 15) Zero copy on TX support for TUN driver, from Michael S. Tsirkin. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1870 commits) genetlink: define lockdep_genl_is_held() when CONFIG_LOCKDEP r8169: revert "add byte queue limit support". ipv4: Change rt->rt_iif encoding. net: Make skb->skb_iif always track skb->dev ipv4: Prepare for change of rt->rt_iif encoding. ipv4: Remove all RTCF_DIRECTSRC handliing. ipv4: Really ignore ICMP address requests/replies. decnet: Don't set RTCF_DIRECTSRC. net/ipv4/ip_vti.c: Fix __rcu warnings detected by sparse. ipv4: Remove redundant assignment rds: set correct msg_namelen openvswitch: potential NULL deref in sample() tcp: dont drop MTU reduction indications bnx2x: Add new 57840 device IDs tcp: avoid oops in tcp_metrics and reset tcpm_stamp niu: Change niu_rbr_fill() to use unlikely() to check niu_rbr_add_page() return value niu: Fix to check for dma mapping errors. net: Fix references to out-of-scope variables in put_cmsg_compat() net: ethernet: davinci_emac: add pm_runtime support net: ethernet: davinci_emac: Remove unnecessary #include ...
Diffstat (limited to 'include/net/inetpeer.h')
-rw-r--r--include/net/inetpeer.h90
1 files changed, 77 insertions, 13 deletions
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 2040bff945d4..53f464d7cddc 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -36,25 +36,19 @@ struct inet_peer {
u32 metrics[RTAX_MAX];
u32 rate_tokens; /* rate limiting for ICMP */
unsigned long rate_last;
- unsigned long pmtu_expires;
- u32 pmtu_orig;
- u32 pmtu_learned;
- struct inetpeer_addr_base redirect_learned;
union {
struct list_head gc_list;
struct rcu_head gc_rcu;
};
/*
* Once inet_peer is queued for deletion (refcnt == -1), following fields
- * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp
+ * are not available: rid, ip_id_count
* We can share memory with rcu_head to help keep inet_peer small.
*/
union {
struct {
atomic_t rid; /* Frag reception counter */
atomic_t ip_id_count; /* IP ID for the next packet */
- __u32 tcp_ts;
- __u32 tcp_ts_stamp;
};
struct rcu_head rcu;
struct inet_peer *gc_next;
@@ -65,6 +59,69 @@ struct inet_peer {
atomic_t refcnt;
};
+struct inet_peer_base {
+ struct inet_peer __rcu *root;
+ seqlock_t lock;
+ u32 flush_seq;
+ int total;
+};
+
+#define INETPEER_BASE_BIT 0x1UL
+
+static inline struct inet_peer *inetpeer_ptr(unsigned long val)
+{
+ BUG_ON(val & INETPEER_BASE_BIT);
+ return (struct inet_peer *) val;
+}
+
+static inline struct inet_peer_base *inetpeer_base_ptr(unsigned long val)
+{
+ if (!(val & INETPEER_BASE_BIT))
+ return NULL;
+ val &= ~INETPEER_BASE_BIT;
+ return (struct inet_peer_base *) val;
+}
+
+static inline bool inetpeer_ptr_is_peer(unsigned long val)
+{
+ return !(val & INETPEER_BASE_BIT);
+}
+
+static inline void __inetpeer_ptr_set_peer(unsigned long *val, struct inet_peer *peer)
+{
+ /* This implicitly clears INETPEER_BASE_BIT */
+ *val = (unsigned long) peer;
+}
+
+static inline bool inetpeer_ptr_set_peer(unsigned long *ptr, struct inet_peer *peer)
+{
+ unsigned long val = (unsigned long) peer;
+ unsigned long orig = *ptr;
+
+ if (!(orig & INETPEER_BASE_BIT) ||
+ cmpxchg(ptr, orig, val) != orig)
+ return false;
+ return true;
+}
+
+static inline void inetpeer_init_ptr(unsigned long *ptr, struct inet_peer_base *base)
+{
+ *ptr = (unsigned long) base | INETPEER_BASE_BIT;
+}
+
+static inline void inetpeer_transfer_peer(unsigned long *to, unsigned long *from)
+{
+ unsigned long val = *from;
+
+ *to = val;
+ if (inetpeer_ptr_is_peer(val)) {
+ struct inet_peer *peer = inetpeer_ptr(val);
+ atomic_inc(&peer->refcnt);
+ }
+}
+
+extern void inet_peer_base_init(struct inet_peer_base *);
+
void inet_initpeers(void) __init;
#define INETPEER_METRICS_NEW (~(u32) 0)
@@ -75,31 +132,38 @@ static inline bool inet_metrics_new(const struct inet_peer *p)
}
/* can be called with or without local BH being disabled */
-struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create);
+struct inet_peer *inet_getpeer(struct inet_peer_base *base,
+ const struct inetpeer_addr *daddr,
+ int create);
-static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create)
+static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base,
+ __be32 v4daddr,
+ int create)
{
struct inetpeer_addr daddr;
daddr.addr.a4 = v4daddr;
daddr.family = AF_INET;
- return inet_getpeer(&daddr, create);
+ return inet_getpeer(base, &daddr, create);
}
-static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr, int create)
+static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base,
+ const struct in6_addr *v6daddr,
+ int create)
{
struct inetpeer_addr daddr;
*(struct in6_addr *)daddr.addr.a6 = *v6daddr;
daddr.family = AF_INET6;
- return inet_getpeer(&daddr, create);
+ return inet_getpeer(base, &daddr, create);
}
/* can be called from BH context or outside */
extern void inet_putpeer(struct inet_peer *p);
extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);
-extern void inetpeer_invalidate_tree(int family);
+extern void inetpeer_invalidate_tree(struct inet_peer_base *);
+extern void inetpeer_invalidate_family(int family);
/*
* temporary check to make sure we dont access rid, ip_id_count, tcp_ts,