// SPDX-License-Identifier: GPL-2.0
#include <linux/rcupdate.h>
#include <linux/spinlock.h>
#include <linux/jiffies.h>
#include <linux/module.h>
#include <linux/cache.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/tcp.h>
#include <linux/hash.h>
#include <linux/tcp_metrics.h>
#include <linux/vmalloc.h>
#include <net/inet_connection_sock.h>
#include <net/net_namespace.h>
#include <net/request_sock.h>
#include <net/inetpeer.h>
#include <net/sock.h>
#include <net/ipv6.h>
#include <net/dst.h>
#include <net/tcp.h>
#include <net/genetlink.h>
static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr,
const struct inetpeer_addr *daddr,
struct net *net, unsigned int hash);
struct tcp_fastopen_metrics {
u16 mss;
u16 syn_loss:10, /* Recurring Fast Open SYN losses */
try_exp:2; /* Request w/ exp. option (once) */
unsigned long last_syn_loss; /* Last Fast Open SYN loss */
struct tcp_fastopen_cookie cookie;
};
/* TCP_METRIC_MAX includes 2 extra fields for userspace compatibility
* Kernel only stores RTT and RTTVAR in usec resolution
*/
#define TCP_METRIC_MAX_KERNEL (TCP_METRIC_MAX - 2)
struct tcp_metrics_block {
struct tcp_metrics_block __rcu *tcpm_next;
struct net *tcpm_net;
struct inetpeer_addr tcpm_saddr;
struct inetpeer_addr tcpm_daddr;
unsigned long tcpm_stamp;
u32 tcpm_lock;
u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1];
struct tcp_fastopen_metrics tcpm_fastopen;
struct rcu_head rcu_head;
};
static inline struct net *tm_net(const struct tcp_metrics_block *tm)
{
/* Paired with the WRITE_ONCE() in tcpm_new() */
return READ_ONCE(tm->tcpm_net);
}
static bool tcp_metric_locked(struct tcp_metrics_block *tm,
enum tcp_metric_index idx)
{
/* Paired with WRITE_ONCE() in tcpm_suck_dst() */
return READ_ONCE(tm->tcpm_lock) & (1 << idx);
}
static u32 tcp_metric_get(const struct tcp_metrics_block *tm,
enum tcp_metric_index idx)
{
/* Paired with WRITE_ONCE() in tcp_metric_set() */
return READ_ONCE(tm->tcpm_vals[idx]);
}
static void tcp_metric_set(struct tcp_metrics_block *tm,
enum tcp_metric_index idx,
u32 val)
{
/* Paired with READ_ONCE() in tcp_metric_get() */
WRITE_ONCE(tm->tcpm_vals[idx], val);
}
static bool addr_same(const struct inetpeer_addr *a,
const struct inetpeer_addr *b)
{
return (a->family == b->family) && !inetpeer_addr_cmp(a, b);
}
struct tcpm_hash_bucket {
struct tcp_metrics_block __rcu *chain;
};
static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly;
static unsigned int tcp_metrics_hash_log __read_mostly;
static DEFINE_SPINLOCK(tcp_metrics_lock);
static DEFINE_SEQLOCK(fastopen_seqlock);
static void tcpm_suck_dst(struct tcp_metrics_block *tm,
const struct dst_entry *dst,
bool fastopen_clear)
{
u32 msval;
u32 val;
WRITE_ONCE(tm->tcpm_stamp, jiffies);
val = 0;
if (dst_metric_locked(dst, RTAX_RTT))
val |= 1 << TCP_METRIC_RTT;
if (dst_metric_locked(dst, RTAX_RTTVAR))
val |= 1 << TCP_METRIC_RTTVAR;
if (dst_metric_locked(dst, RTAX_SSTHRESH))
val |= 1 << TCP_METRIC_SSTHRESH;
if (dst_metric_locked(dst, RTAX_CWND))
val |= 1 << TCP_METRIC_CWND;
if (dst_metric_locked(dst, RTAX_REORDERING))
val |= 1 << TCP_METRIC_REORDERING;
/* Paired with READ_ONCE() in tcp_metric_locked() */
WRITE_ONCE(tm->tcpm_lock, val);
msval = dst_metric_raw(dst, RTAX_RTT);
tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC);
msval = dst_metric_raw(dst, RTAX_RTTVAR);
tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC);
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
dst_metric_raw(dst, RTAX_SSTHRESH));
tcp_metric_set(tm, TCP_METRIC_CWND,
dst_metric_raw(dst, RTAX_CWND));
tcp_metric_set(tm, TCP_METRIC_REORDERING,
dst_metric_raw(dst, RTAX_REORDERING));
if (fastopen_clear) {
write_seqlock(&fastopen_seqlock);
tm->tcpm_fastopen.mss = 0;
tm->tcpm_fastopen.syn_loss = 0;
tm->tcpm_fastopen.try_exp = 0;
tm->tcpm_fastopen.cookie.exp = false;
tm->tcpm_fastopen.cookie.len = 0;
write_sequnlock(&fastopen_seqlock);
}
}
#define TCP_METRICS_TIMEOUT (60 * 60 * HZ)
static void tcpm_check_stamp(struct tcp_metrics_block *tm,
const struct dst_entry *dst)
{
unsigned long limit;
if (!tm)
return;
limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT;
if (unlikely(time_after(jiffies, limit)))
tcpm_suck_dst(tm, dst, false);
}
#define TCP_METRICS_RECLAIM_DEPTH 5
#define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL
#define deref_locked(p) \
rcu_dereference_protected(p, lockdep_is_held(&tcp_metrics_lock))
static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
struct inetpeer_addr *saddr,
struct inetpeer_addr *daddr,
unsigned int hash)
{
struct tcp_metrics_block *tm;
struct net *net;
bool reclaim = false;
spin_lock_bh(&tcp_metrics_lock);