// SPDX-License-Identifier: GPL-2.0-or-later
/*
* NET3: Implementation of the ICMP protocol layer.
*
* Alan Cox, <alan@lxorguk.ukuu.org.uk>
*
* Some of the function names and the icmp unreach table for this
* module were derived from [icmp.c 1.0.11 06/02/93] by
* Ross Biro, Fred N. van Kempen, Mark Evans, Alan Cox, Gerhard Koerting.
* Other than that this module is a complete rewrite.
*
* Fixes:
* Clemens Fruhwirth : introduce global icmp rate limiting
* with icmp type masking ability instead
* of broken per type icmp timeouts.
* Mike Shaver : RFC1122 checks.
* Alan Cox : Multicast ping reply as self.
* Alan Cox : Fix atomicity lockup in ip_build_xmit
* call.
* Alan Cox : Added 216,128 byte paths to the MTU
* code.
* Martin Mares : RFC1812 checks.
* Martin Mares : Can be configured to follow redirects
* if acting as a router _without_ a
* routing protocol (RFC 1812).
* Martin Mares : Echo requests may be configured to
* be ignored (RFC 1812).
* Martin Mares : Limitation of ICMP error message
* transmit rate (RFC 1812).
* Martin Mares : TOS and Precedence set correctly
* (RFC 1812).
* Martin Mares : Now copying as much data from the
* original packet as we can without
* exceeding 576 bytes (RFC 1812).
* Willy Konynenberg : Transparent proxying support.
* Keith Owens : RFC1191 correction for 4.2BSD based
* path MTU bug.
* Thomas Quinot : ICMP Dest Unreach codes up to 15 are
* valid (RFC 1812).
* Andi Kleen : Check all packet lengths properly
* and moved all kfree_skb() up to
* icmp_rcv.
* Andi Kleen : Move the rate limit bookkeeping
* into the dest entry and use a token
* bucket filter (thanks to ANK). Make
* the rates sysctl configurable.
* Yu Tianli : Fixed two ugly bugs in icmp_send
* - IP option length was accounted wrongly
* - ICMP header length was not accounted
* at all.
* Tristan Greaves : Added sysctl option to ignore bogus
* broadcast responses from broken routers.
*
* To Fix:
*
* - Should use skb_pull() instead of all the manual checking.
* This would also greatly simply some upper layer error handlers. --AK
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/types.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/inetdevice.h>
#include <linux/netdevice.h>
#include <linux/string.h>
#include <linux/netfilter_ipv4.h>
#include <linux/slab.h>
#include <net/snmp.h>
#include <net/ip.h>
#include <net/route.h>
#include <net/protocol.h>
#include <net/icmp.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <net/raw.h>
#include <net/ping.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/init.h>
#include <linux/uaccess.h>
#include <net/checksum.h>
#include <net/xfrm.h>
#include <net/inet_common.h>
#include <net/ip_fib.h>
#include <net/l3mdev.h>
#include <net/addrconf.h>
#include <net/inet_dscp.h>
#define CREATE_TRACE_POINTS
#include <trace/events/icmp.h>
/*
* Build xmit assembly blocks
*/
struct icmp_bxm {
struct sk_buff *skb;
int offset;
int data_len;
struct {
struct icmphdr icmph;
__be32 times[3];
} data;
int head_len;
struct ip_options_data replyopts;
};
/* An array of errno for error messages from dest unreach. */
/* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */
const struct icmp_err icmp_err_convert[] = {
{
.errno = ENETUNREACH, /* ICMP_NET_UNREACH */
.fatal = 0,
},
{
.errno = EHOSTUNREACH, /* ICMP_HOST_UNREACH */
.fatal = 0,
},
{
.errno = ENOPROTOOPT /* ICMP_PROT_UNREACH */,
.fatal = 1,
},
{
.errno = ECONNREFUSED, /* ICMP_PORT_UNREACH */
.fatal = 1,
},
{
.errno = EMSGSIZE, /* ICMP_FRAG_NEEDED */
.fatal = 0,
},
{
.errno = EOPNOTSUPP, /* ICMP_SR_FAILED */
.fatal = 0,
},
{
.errno = ENETUNREACH, /* ICMP_NET_UNKNOWN */
.fatal = 1,
},
{
.errno = EHOSTDOWN, /* ICMP_HOST_UNKNOWN */
.fatal = 1,
},
{
.errno = ENONET, /* ICMP_HOST_ISOLATED */
.fatal = 1,
},
{
.errno = ENETUNREACH, /* ICMP_NET_ANO */
.fatal = 1,
},
{
.errno = EHOSTUNREACH, /* ICMP_HOST_ANO */
.fatal = 1,
},
{
.errno = ENETUNREACH, /* ICMP_NET_UNR_TOS */
.fatal = 0,
},
{
.errno = EHOSTUNREACH, /* ICMP_HOST_UNR_TOS */
.fatal = 0,
},
{
.errno = EHOSTUNREACH, /* ICMP_PKT_FILTERED */
.fatal = 1,
},
{
.errno = EHOSTUNREACH, /* ICMP_PREC_VIOLATION */
.fatal = 1,
},
{
.errno = EHOSTUNREACH, /* ICMP_PREC_CUTOFF */
.fatal = 1,
},
};
EXPORT_SYMBOL(icmp_err_convert);
/*
* ICMP control array. This specifies what to do with each ICMP.
*/
struct icmp_control {
enum skb_drop_reason (*handler)(struct sk_buff *skb);
short error; /* This ICMP is classed as an error message */
};
static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
static DEFINE_PER_CPU(struct sock *, ipv4_icmp_sk);
/* Called with BH disabled */
static inline struct sock *icmp_xmit_lock(struct net *net)
{
struct sock *sk;
sk = this_cpu_read(ipv4_icmp_sk);
if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
/* This can happen if the output path signals a
* dst_link_failure() for an outgoing ICMP packet.
*/
return NULL;
}
sock_net_set(sk, net);
return sk;
}
static inline void icmp_xmit_unlock(struct sock *sk)
{
sock_net_set(sk, &init_net);
spin_unlock(&sk->sk_lock.slock);
}
/**
* icmp_global_allow - Are we allowed to send one more ICMP message ?
* @net: network namespace
*
* Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec.
* Returns false if we reached the limit and can not send another packet.
* Works in tandem with icmp_global_consume().
*/
bool icmp_global_allow(struct net *net)
{
u32 delta, now, oldstamp;
int incr, new, old;
/* Note: many cpus could find this condition true.
* Then later icmp_global_consume() could consume more credits,
* this is an acceptable race.
*/
if (atomic_read(&net->ipv4.icmp_global_credit) > 0)
return true;
now = jiffies;
oldstamp = READ_ONCE(net->ipv4.icmp_global_stamp);
delta = min_t(u32, now - oldstamp, HZ);
if (delta < HZ / 50)
return false;
incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec) * delta / HZ;
if (!incr)
return false;
if (cmpxchg(&net->ipv4.icmp_global_stamp, oldstamp, now) == oldstamp) {
old = atomic_read(&net->ipv4.icmp_global_credit);
do {
new = min(old + incr, READ_ONCE(net->ipv4.sysctl_icmp_msgs_burst));
} while (!atomic_try_cmpxchg(&net->ipv4.icmp_global_credit, &old, new));
}
return true;
}
EXPORT_SYMBOL(icmp_global_allow);
void icmp_global_consume(struct net *net)
{
int credits = get_random_u32_below(3);
/* Note: this might make icmp_global.credit negative. */
if (credits)
atomic_sub(credits, &net->ipv4.icmp_global_credit);
}
EXPORT_SYMBOL(icmp_global_consume);
static bool icmpv4_mask_allow(struct net *net, int type, int code)
{
if (type > NR_ICMP_TYPES)
return true;
/* Don't limit PMTU discovery. */
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
return true;
/* Limit if icmp type is enabled in ratemask. */
if (!((1 << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask)))
return true;
return false;
}
static bool icmpv4_global_allow(struct net *net, int type, int code,
bool *apply_ratelimit)
{
if (icmpv4_mask_allow(net, type, code))
return true;
if (icmp_global_allow(net)) {
*apply_ratelimit = true;
return true;
}
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
return
|