/*
* Copyright (c) 2015 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/module.h>
#include <linux/openvswitch.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/sctp.h>
#include <net/ip.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_labels.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#ifdef CONFIG_NF_NAT_NEEDED
#include <linux/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_l3proto.h>
#endif
#include "datapath.h"
#include "conntrack.h"
#include "flow.h"
#include "flow_netlink.h"
struct ovs_ct_len_tbl {
int maxlen;
int minlen;
};
/* Metadata mark for masked write to conntrack mark */
struct md_mark {
u32 value;
u32 mask;
};
/* Metadata label for masked write to conntrack label. */
struct md_labels {
struct ovs_key_ct_labels value;
struct ovs_key_ct_labels mask;
};
enum ovs_ct_nat {
OVS_CT_NAT = 1 << 0, /* NAT for committed connections only. */
OVS_CT_SRC_NAT = 1 << 1, /* Source NAT for NEW connections. */
OVS_CT_DST_NAT = 1 << 2, /* Destination NAT for NEW connections. */
};
/* Conntrack action context for execution. */
struct ovs_conntrack_info {
struct nf_conntrack_helper *helper;
struct nf_conntrack_zone zone;
struct nf_conn *ct;
u8 commit : 1;
u8 nat : 3; /* enum ovs_ct_nat */
u16 family;
struct md_mark mark;
struct md_labels labels;
#ifdef CONFIG_NF_NAT_NEEDED
struct nf_nat_range range; /* Only present for SRC NAT and DST NAT. */
#endif
};
static bool labels_nonzero(const struct ovs_key_ct_labels *labels);
static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info);
static u16 key_to_nfproto(const struct sw_flow_key *key)
{
switch (ntohs(key->eth.type)) {
case ETH_P_IP:
return NFPROTO_IPV4;
case ETH_P_IPV6:
return NFPROTO_IPV6;
default:
return NFPROTO_UNSPEC;
}
}
/* Map SKB connection state into the values used by flow definition. */
static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
{
u8 ct_state = OVS_CS_F_TRACKED;
switch (ctinfo) {
case IP_CT_ESTABLISHED_REPLY:
case IP_CT_RELATED_REPLY:
ct_state |= OVS_CS_F_REPLY_DIR;
break;
default:
break;
}
switch (ctinfo) {
case IP_CT_ESTABLISHED:
case IP_CT_ESTABLISHED_REPLY:
ct_state |= OVS_CS_F_ESTABLISHED;
break;
case IP_CT_RELATED:
case IP_CT_RELATED_REPLY:
ct_state |= OVS_CS_F_RELATED;
break;
case IP_CT_NEW:
ct_state |= OVS_CS_F_NEW;
break;
default:
break;
}
return ct_state;
}
static u32 ovs_ct_get_mark(const struct nf_conn *ct)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
return ct ? ct->mark : 0;
#else
return 0;
#endif
}
/* Guard against conntrack labels max size shrinking below 128 bits. */
#if NF_CT_LABELS_MAX_SIZE < 16
#error NF_CT_LABELS_MAX_SIZE must be at least 16 bytes
#endif
static void ovs_ct_get_labels(const struct nf_conn *ct,
struct ovs_key_ct_labels *labels)
{
struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
if (cl)
memcpy(labels, cl->bits, OVS_CT_LABELS_LEN);
else
memset(labels, 0, OVS_CT_LABELS_LEN);
}
static void __ovs_ct_update_key_orig_tp(struct sw_flow_key *key,
const struct nf_conntrack_tuple *orig,
u8 icmp_proto)
{
key->ct.orig_proto = orig->dst.protonum;
if (orig->dst.protonum == icmp_proto) {
key->ct.orig_tp.src = htons(orig->dst.u.icmp.type);
key->ct.orig_tp.dst = htons(orig->dst.u.icmp.code);
} else {
key->ct.orig_tp.src = orig->src.u.all;
key->ct.orig_tp.dst = orig->dst.u.all;
}
}
static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
const struct nf_conntrack_zone *zone,
const struct nf_conn *ct)
{
key->ct.state = state;
key->ct.zone = zone->id;
key->ct.mark = ovs_ct_get_mark(ct);
ovs_ct_get_labels(ct, &key->ct.labels);
if (ct) {
const struct nf_conntrack_tuple *orig;
/* Use the master if we have one. */
if (ct->master)
ct = ct->master;
orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
/* IP version must match with the master connection. */
if (key->eth.type == htons(ETH_P_IP) &&
nf_ct_l3num(ct) == NFPROTO_IPV4) {
key->ipv4.ct_orig.src = orig->src.u3.ip;
key->ipv4.ct_orig.dst = orig->dst.u3.ip;
__ovs_ct_update_key_orig_tp(key, orig, IPPROTO_ICMP);
return;
} else if (key->eth.type == htons(ETH_P_IPV6) &&
!sw_flow_key_is_nd(key) &&
nf_ct_l3num(ct) == NFPROTO_IPV6) {
key->ipv6.ct_orig.src = orig->src.u3.in6;
key->ipv6.ct_orig.dst = orig->dst.u3.in6;
__ovs_ct_update_key_orig_tp(key, orig, NEXTHDR_ICMP);
return;
}
}
/* Clear 'ct.orig_proto' to mark the non-existence of conntrack
* original direction key fields.
*/
key->ct.orig_proto = 0;
}
/* Update 'key' based on skb->_nfct. If 'post_ct' is true, then OVS has
* previously sent the packet to conntrack via the ct action. If
* 'keep_nat_flags' is true, the existing NAT flags retained, else they are
* initialized from the connection status.
*/
static void ovs_ct_update_key(const struct sk_buff *skb,
const struct ovs_conntrack_info *info,
struct sw_flow_key *key, bool post_ct,
bool keep_nat_flags)
{
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
u8 state = 0;
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
state = ovs_ct_get_state(ctinfo);
/* All unconfirmed entries are NEW connections. */
if (!nf_ct_is_confirmed(ct))
state |= OVS_CS_F_NEW;
/* OVS persists the related flag for the duration of the
* connection.
*/
if (ct->master)
state |= OVS_CS_F_RELATED;
if (keep_nat_flags) {
state |= key->ct.state & OVS_CS_F_NAT_MASK;
} else {
if (ct->status & IPS_SRC_NAT)
state |= OVS_CS_F_SRC_NAT;
if (ct->status & IPS_DST_NAT)
state |= OVS_CS_F_DST_NAT;
}
zone = nf_ct_zone(ct);
} else if (post_ct) {
state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
if (info)
zone = &info->zone;
}
__ovs_ct_update_key(key, state, zone, ct);
}
/* This is called to initialize CT key fields possibly coming in from the local
* stack.
*/
void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
{
ovs_ct_update_key(skb, NULL, key, false, false);
}
#define IN6_ADDR_INITIALIZER(ADDR) \
{ (ADDR).s6_addr32[0], (ADDR).s6_addr32[1], \
(ADDR).s6_addr32[2], (ADDR).s6_addr32[3] }
int ovs_ct_put_key(const struct sw_flow_key *swkey,
const struct sw_flow_key *output, struct sk_buff *skb)
{
if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, output->ct.s
|