// SPDX-License-Identifier: GPL-2.0
#include <linux/rcupdate.h>
#include <linux/spinlock.h>
#include <linux/jiffies.h>
#include <linux/module.h>
#include <linux/cache.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/tcp.h>
#include <linux/hash.h>
#include <linux/tcp_metrics.h>
#include <linux/vmalloc.h>
#include <net/inet_connection_sock.h>
#include <net/net_namespace.h>
#include <net/request_sock.h>
#include <net/inetpeer.h>
#include <net/sock.h>
#include <net/ipv6.h>
#include <net/dst.h>
#include <net/tcp.h>
#include <net/genetlink.h>
static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr,
const struct inetpeer_addr *daddr,
struct net *net, unsigned int hash);
struct tcp_fastopen_metrics {
u16 mss;
u16 syn_loss:10, /* Recurring Fast Open SYN losses */
try_exp:2; /* Request w/ exp. option (once) */
unsigned long last_syn_loss; /* Last Fast Open SYN loss */
struct tcp_fastopen_cookie cookie;
};
/* TCP_METRIC_MAX includes 2 extra fields for userspace compatibility
* Kernel only stores RTT and RTTVAR in usec resolution
*/
#define TCP_METRIC_MAX_KERNEL (TCP_METRIC_MAX - 2)
struct tcp_metrics_block {
struct tcp_metrics_block __rcu *tcpm_next;
possible_net_t tcpm_net;
struct inetpeer_addr tcpm_saddr;
struct inetpeer_addr tcpm_daddr;
unsigned long tcpm_stamp;
u32 tcpm_lock;
u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1];
struct tcp_fastopen_metrics tcpm_fastopen;
struct rcu_head rcu_head;
};
static inline struct net *tm_net(struct tcp_metrics_block *tm)
{
return read_pnet(&tm->tcpm_net);
}
static bool tcp_metric_locked(struct tcp_metrics_block *tm,
enum tcp_metric_index idx)
{
return tm->tcpm_lock & (1 << idx);
}
static u32 tcp_metric_get(struct tcp_metrics_block *tm,
enum tcp_metric_index idx)
{
return tm->tcpm_vals[idx];
}
static void tcp_metric_set(struct tcp_metrics_block *tm,
enum tcp_metric_index idx,
u32 val)
{
tm->tcpm_vals[idx] = val;
}
static bool addr_same(const struct inetpeer_addr *a,
const struct inetpeer_addr *b)
{
return inetpeer_addr_cmp(a, b) == 0;
}
struct tcpm_hash_bucket {
struct tcp_metrics_block __rcu *chain;
};
static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly;
static unsigned int tcp_metrics_hash_log __read_mostly;
static DEFINE_SPINLOCK(tcp_metrics_lock);
static void tcpm_suck_dst(struct tcp_metrics_block *tm,
const struct dst_entry *dst,
bool fastopen_clear)
{
u32 msval;
u32 val;
tm->tcpm_stamp = jiffies;
val = 0;
if (dst_metric_locked(dst, RTAX_RTT))
val |= 1 << TCP_METRIC_RTT;
if (dst_metric_locked(dst, RTAX_RTTVAR))
val |= 1 << TCP_METRIC_RTTVAR;
if (dst_metric_locked(dst, RTAX_SSTHRESH))
val |= 1 << TCP_METRIC_SSTHRESH;
if (dst_metric_locked(dst, RTAX_CWND))
val |= 1 << TCP_METRIC_CWND;
if (dst_metric_locked(dst, RTAX_REORDERING))
val |= 1 << TCP_METRIC_REORDERING;
tm->tcpm_lock = val;
msval = dst_metric_raw(dst, RTAX_RTT);
tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
msval = dst_metric_raw(dst, RTAX_RTTVAR);
tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC;
tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
if (fastopen_clear) {
tm->tcpm_fastopen.mss = 0;
tm->tcpm_fastopen.syn_loss = 0;
tm->tcpm_fastopen.try_exp = 0;
tm->tcpm_fastopen.cookie.exp = false;
tm->tcpm_fastopen.cookie.len = 0;
}
}
#define TCP_METRICS_TIMEOUT (60 * 60 * HZ)
static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
{
if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
tcpm_suck_dst(tm, dst, false);
}
#define TCP_METRICS_RECLAIM_DEPTH 5
#define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL
#define deref_locked(p) \
rcu_dereference_protected(p, lockdep_is_held(&tcp_metrics_lock))
static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
struct inetpeer_addr *saddr,
struct inetpeer_addr *daddr,
unsigned int hash)
{
struct tcp_metrics_block *tm;
struct net *net;
bool reclaim = false;
spin_lock_bh(&tcp_metrics_lock);
net = dev_net(dst->dev);
/* While waiting for the spin-lock the cache might have been populated
* with this entry and so we have to check again.
*/
tm = __tcp_get_metrics(saddr, daddr, net, hash);
if (tm == TCP_METRICS_RECLAIM_PTR) {
reclaim = true;
tm = NULL;
}
if (tm) {
tcpm_check_stamp(tm, dst);
goto out_unlock;
}
if (unlikely(reclaim)) {
struct tcp_metrics_block *oldest;
oldest = deref_locked(tcp_metrics_hash[hash].chain);
for (tm = deref_lo