From: Steven Barth <steven@midlink.org>
Subject: Add support for MAP-E FMRs (mesh mode)

MAP-E FMRs (draft-ietf-softwire-map-10) are rules for IPv4-communication
between MAP CEs (mesh mode) without the need to forward such data to a
border relay. This is similar to how 6rd works but for IPv4 over IPv6.

Signed-off-by: Steven Barth <cyrus@openwrt.org>
---
 include/net/ip6_tunnel.h       |  13 ++
 include/uapi/linux/if_tunnel.h |  13 ++
 net/ipv6/ip6_tunnel.c          | 276 +++++++++++++++++++++++++++++++++++++++--
 3 files changed, 291 insertions(+), 11 deletions(-)

--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -18,6 +18,18 @@
 /* determine capability on a per-packet basis */
 #define IP6_TNL_F_CAP_PER_PACKET 0x40000
 
+/* IPv6 tunnel FMR */
+struct __ip6_tnl_fmr {
+	struct __ip6_tnl_fmr *next; /* next fmr in list */
+	struct in6_addr ip6_prefix;
+	struct in_addr ip4_prefix;
+
+	__u8 ip6_prefix_len;
+	__u8 ip4_prefix_len;
+	__u8 ea_len;
+	__u8 offset;
+};
+
 struct __ip6_tnl_parm {
 	char name[IFNAMSIZ];	/* name of tunnel device */
 	int link;		/* ifindex of underlying L2 interface */
@@ -29,6 +41,7 @@ struct __ip6_tnl_parm {
 	__u32 flags;		/* tunnel flags */
 	struct in6_addr laddr;	/* local tunnel end-point address */
 	struct in6_addr raddr;	/* remote tunnel end-point address */
+	struct __ip6_tnl_fmr *fmrs;	/* FMRs */
 
 	__be16			i_flags;
 	__be16			o_flags;
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -77,10 +77,23 @@ enum {
 	IFLA_IPTUN_ENCAP_DPORT,
 	IFLA_IPTUN_COLLECT_METADATA,
 	IFLA_IPTUN_FWMARK,
+	IFLA_IPTUN_FMRS,
 	__IFLA_IPTUN_MAX,
 };
 #define IFLA_IPTUN_MAX	(__IFLA_IPTUN_MAX - 1)
 
+enum {
+	IFLA_IPTUN_FMR_UNSPEC,
+	IFLA_IPTUN_FMR_IP6_PREFIX,
+	IFLA_IPTUN_FMR_IP4_PREFIX,
+	IFLA_IPTUN_FMR_IP6_PREFIX_LEN,
+	IFLA_IPTUN_FMR_IP4_PREFIX_LEN,
+	IFLA_IPTUN_FMR_EA_LEN,
+	IFLA_IPTUN_FMR_OFFSET,
+	__IFLA_IPTUN_FMR_MAX,
+};
+#define IFLA_IPTUN_FMR_MAX (__IFLA_IPTUN_FMR_MAX - 1)
+
 enum tunnel_encap_types {
 	TUNNEL_ENCAP_NONE,
 	TUNNEL_ENCAP_FOU,
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -11,6 +11,9 @@
  *      linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
  *
  *      RFC 2473
+ *
+ *      Changes:
+ *      Steven Barth <cyrus@openwrt.org>:           MAP-E FMR support
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -67,9 +70,9 @@ static bool log_ecn_error = true;
 module_param(log_ecn_error, bool, 0644);
 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
-static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
+static u32 HASH(const struct in6_addr *addr)
 {
-	u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
+	u32 hash = ipv6_addr_hash(addr);
 
 	return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT);
 }
@@ -114,17 +117,33 @@ static struct ip6_tnl *
 ip6_tnl_lookup(struct net *net, int link,
 	       const struct in6_addr *remote, const struct in6_addr *local)
 {
-	unsigned int hash = HASH(remote, local);
+	unsigned int hash = HASH(local);
 	struct ip6_tnl *t, *cand = NULL;
 	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 	struct in6_addr any;
 
 	for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
 		if (!ipv6_addr_equal(local, &t->parms.laddr) ||
-		    !ipv6_addr_equal(remote, &t->parms.raddr) ||
 		    !(t->dev->flags & IFF_UP))
 			continue;
 
+		if (!ipv6_addr_equal(remote, &t->parms.raddr)) {
+			struct __ip6_tnl_fmr *fmr;
+			bool found = false;
+
+			for (fmr = t->parms.fmrs; fmr; fmr = fmr->next) {
+				if (!ipv6_prefix_equal(remote, &fmr->ip6_prefix,
+						       fmr->ip6_prefix_len))
+					continue;
+
+				found = true;
+				break;
+			}
+
+			if (!found)
+				continue;
+		}
+
 		if (link == t->parms.link)
 			return t;
 		else
@@ -132,7 +151,7 @@ ip6_tnl_lookup(struct net *net, int link
 	}
 
 	memset(&any, 0, sizeof(any));
-	hash = HASH(&any, local);
+	hash = HASH(local);
 	for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
 		if (!ipv6_addr_equal(local, &t->parms.laddr) ||
 		    !ipv6_addr_any(&t->parms.raddr) ||
@@ -145,7 +164,7 @@ ip6_tnl_lookup(struct net *net, int link
 			cand = t;
 	}
 
-	hash = HASH(remote, &any);
+	hash = HASH(&any);
 	for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
 		if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
 		    !ipv6_addr_any(&t->parms.laddr) ||
@@ -194,7 +213,7 @@ ip6_tnl_bucket(struct ip6_tnl_net *ip6n,
 
 	if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
 		prio = 1;
-		h = HASH(remote, local);
+		h = HASH(local);
 	}
 	return &ip6n->tnls[prio][h];
 }
@@ -378,6 +397,12 @@ ip6_tnl_dev_uninit(struct net_device *de
 	struct net *net = t->net;
 	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 
+	while (t->parms.fmrs) {
+		struct __ip6_tnl_fmr *next = t->parms.fmrs->next;
+		kfree(t->parms.fmrs);
+		t->parms.fmrs = next;
+	}
+
 	if (dev == ip6n->fb_tnl_dev)
 		RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
 	else
@@ -790,6 +815,107 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
 }
 EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
 
+/**
+ * ip4ip6_fmr_calc - calculate target / source IPv6-address based on FMR
+ *   @dest: destination IPv6 address buffer
+ *   @skb: received socket buffer
+ *   @fmr: MAP FMR
+ *   @xmit: Calculate for xmit or rcv
+ **/
+static void ip4ip6_fmr_calc(struct in6_addr *dest,
+		const struct iphdr *iph, const uint8_t *end,
+		const struct __ip6_tnl_fmr *fmr, bool xmit)
+{
+	int psidlen = fmr->ea_len - (32 - fmr->ip4_prefix_len);
+	u8 *portp = NULL;
+	bool use_dest_addr;
+	const struct iphdr *dsth = iph;
+
+	if ((u8*)dsth >= end)
+		return;
+
+	/* find significant IP header */
+	if (iph->protocol == IPPROTO_ICMP) {
+		struct icmphdr *ih = (struct icmphdr*)(((u8*)dsth) + dsth->ihl * 4);
+		if (ih && ((u8*)&ih[1]) <= end && (
+			ih->type == ICMP_DEST_UNREACH ||
+			ih->type == ICMP_SOURCE_QUENCH ||
+			ih->type == ICMP_TIME_EXCEEDED ||
+			ih->type == ICMP_PARAMETERPROB ||
+			ih->type == ICMP_REDIRECT))
+				dsth = (const struct iphdr*)&ih[1];
+	}
+
+	/* in xmit-path use dest port by default and source port only if
+		this is an ICMP reply to something else; vice versa in rcv-path */
+	use_dest_addr = (xmit && dsth == iph) || (!xmit && dsth != iph);
+
+	/* get dst port */
+	if (((u8*)&dsth[1]) <= end && (
+		dsth->protocol == IPPROTO_UDP ||
+		dsth->protocol == IPPROTO_TCP ||
+		dsth->protocol == IPPROTO_SCTP ||
+		dsth->protocol == IPPROTO_DCCP)) {
+			/* for UDP, TCP, SCTP and DCCP source and dest port
+			follow IPv4 header directly */
+			portp = ((u8*)dsth) + dsth->ihl * 4;
+
+			if (use_dest_addr)
+				portp += sizeof(u16);
+	} else if (iph->protocol == IPPROTO_ICMP) {
+		struct icmphdr *ih = (struct icmphdr*)(((u8*)dsth) + dsth->ihl * 4);
+
+		/* use icmp identifier as port */
+		if (((u8*)&ih) <= end && (
+		    (use_dest_addr && (
+		    ih->type == ICMP_ECHOREPLY ||
+			ih->type == ICMP_TIMESTAMPREPLY ||
+			ih->type == ICMP_INFO_REPLY ||
+			ih->type == ICMP_ADDRESSREPLY)) ||
+			(!use_dest_addr && (
+			ih->type == ICMP_ECHO ||
+			ih->type == ICMP_TIMESTAMP ||
+			ih->type == ICMP_INFO_REQUEST ||
+			ih->type == ICMP_ADDRESS)
+			)))
+				portp = (u8*)&ih->un.echo.id;
+	}
+
+	if ((portp && &portp[2] <= end) || psidlen == 0) {
+		int frombyte = fmr->ip6_prefix_len / 8;
+		int fromrem = fmr->ip6_prefix_len % 8;
+		int bytes = sizeof(struct in6_addr) - frombyte;
+		const u32 *addr = (use_dest_addr) ? &iph->daddr : &iph->saddr;
+		u64 eabits = ((u64)ntohl(*addr)) << (32 + fmr->ip4_prefix_len);
+		u64 t = 0;
+
+		/* extract PSID from port and add it to eabits */
+		u16 psidbits = 0;
+		if (psidlen > 0) {
+			psidbits = ((u16)portp[0]) << 8 | ((u16)portp[1]);
+			psidbits >>= 16 - psidlen - fmr->offset;
+			psidbits = (u16)(psidbits << (16 - psidlen));
+			eabits |= ((u64)psidbits) << (48 - (fmr->ea_len - psidlen));
+		}
+
+		/* rewrite destination address */
+		*dest = fmr->ip6_prefix;
+		memcpy(&dest->s6_addr[10], addr, sizeof(*addr));
+		dest->s6_addr16[7] = htons(psidbits >> (16 - psidlen));
+
+		if (bytes > sizeof(u64))
+			bytes = sizeof(u64);
+
+		/* insert eabits */
+		memcpy(&t, &dest->s6_addr[frombyte], bytes);
+		t = be64_to_cpu(t) & ~(((((u64)1) << fmr->ea_len) - 1)
+			<< (64 - fmr->ea_len - fromrem));
+		t = cpu_to_be64(t | (eabits >> fromrem));
+		memcpy(&dest->s6_addr[frombyte], &t, bytes);
+	}
+}
+
+
 static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
 			 const struct tnl_ptk_info *tpi,
 			 struct metadata_dst *tun_dst,
@@ -857,6 +983,27 @@ static int __ip6_tnl_rcv(struct ip6_tnl
 
 	memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
 
+	if (tpi->proto == htons(ETH_P_IP) && tunnel->parms.fmrs &&
+		!ipv6_addr_equal(&ipv6h->saddr, &tunnel->parms.raddr)) {
+			/* Packet didn't come from BR, so lookup FMR */
+			struct __ip6_tnl_fmr *fmr;
+			struct in6_addr expected = tunnel->parms.raddr;
+			for (fmr = tunnel->parms.fmrs; fmr; fmr = fmr->next)
+				if (ipv6_prefix_equal(&ipv6h->saddr,
+					&fmr->ip6_prefix, fmr->ip6_prefix_len))
+						break;
+
+			/* Check that IPv6 matches IPv4 source to prevent spoofing */
+			if (fmr)
+				ip4ip6_fmr_calc(&expected, ip_hdr(skb),
+						skb_tail_pointer(skb), fmr, false);
+
+			if (!ipv6_addr_equal(&ipv6h->saddr, &expected)) {
+				rcu_read_unlock();
+				goto drop;
+			}
+	}
+
 	__skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
 
 	err = dscp_ecn_decapsulate(tunnel, ipv6h, skb);
@@ -1004,6 +1151,7 @@ static void init_tel_txopt(struct ipv6_t
 	opt->ops.opt_nflen = 8;
 }
 
+
 /**
  * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
  *   @t: the outgoing tunnel device
@@ -1284,6 +1432,7 @@ ipxip6_tnl_xmit(struct sk_buff *skb, str
 		u8 protocol)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
+	struct __ip6_tnl_fmr *fmr;
 	struct ipv6hdr *ipv6h;
 	const struct iphdr  *iph;
 	int encap_limit = -1;
@@ -1383,6 +1532,18 @@ ipxip6_tnl_xmit(struct sk_buff *skb, str
 	fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
 	dsfield = INET_ECN_encapsulate(dsfield, orig_dsfield);
 
+	/* try to find matching FMR */
+	for (fmr = t->parms.fmrs; fmr; fmr = fmr->next) {
+		unsigned mshift = 32 - fmr->ip4_prefix_len;
+		if (ntohl(fmr->ip4_prefix.s_addr) >> mshift ==
+				ntohl(ip_hdr(skb)->daddr) >> mshift)
+			break;
+	}
+
+	/* change dstaddr according to FMR */
+	if (fmr)
+		ip4ip6_fmr_calc(&fl6.daddr, ip_hdr(skb), skb_tail_pointer(skb), fmr, true);
+
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
 
@@ -1536,6 +1697,14 @@ ip6_tnl_change(struct ip6_tnl *t, const
 	t->parms.link = p->link;
 	t->parms.proto = p->proto;
 	t->parms.fwmark = p->fwmark;
+
+	while (t->parms.fmrs) {
+		struct __ip6_tnl_fmr *next = t->parms.fmrs->next;
+		kfree(t->parms.fmrs);
+		t->parms.fmrs = next;
+	}
+	t->parms.fmrs = p->fmrs;
+
 	dst_cache_reset(&t->dst_cache);
 	ip6_tnl_link_config(t);
 	return 0;
@@ -1574,6 +1743,7 @@ ip6_tnl_parm_from_user(struct __ip6_tnl_
 	p->flowinfo = u->flowinfo;
 	p->link = u->link;
 	p->proto = u->proto;
+	p->fmrs = NULL;
 	memcpy(p->name, u->name, sizeof(u->name));
 }
 
@@ -1960,6 +2130,15 @@ static int ip6_tnl_validate(struct nlatt
 	return 0;
 }
 
+static const struct nla_policy ip6_tnl_fmr_policy[IFLA_IPTUN_FMR_MAX + 1] = {
+	[IFLA_IPTUN_FMR_IP6_PREFIX] = { .len = sizeof(struct in6_addr) },
+	[IFLA_IPTUN_FMR_IP4_PREFIX] = { .len = sizeof(struct in_addr) },
+	[IFLA_IPTUN_FMR_IP6_PREFIX_LEN] = { .type = NLA_U8 },
+	[IFLA_IPTUN_FMR_IP4_PREFIX_LEN] = { .type = NLA_U8 },
+	[IFLA_IPTUN_FMR_EA_LEN] = { .type = NLA_U8 },
+	[IFLA_IPTUN_FMR_OFFSET] = { .type = NLA_U8 }
+};
+
 static void ip6_tnl_netlink_parms(struct nlattr *data[],
 				  struct __ip6_tnl_parm *parms)
 {
@@ -1997,6 +2176,46 @@ static void ip6_tnl_netlink_parms(struct
 
 	if (data[IFLA_IPTUN_FWMARK])
 		parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
+
+	if (data[IFLA_IPTUN_FMRS]) {
+		unsigned rem;
+		struct nlattr *fmr;
+		nla_for_each_nested(fmr, data[IFLA_IPTUN_FMRS], rem) {
+			struct nlattr *fmrd[IFLA_IPTUN_FMR_MAX + 1], *c;
+			struct __ip6_tnl_fmr *nfmr;
+
+			nla_parse_nested(fmrd, IFLA_IPTUN_FMR_MAX,
+				fmr, ip6_tnl_fmr_policy, NULL);
+
+			if (!(nfmr = kzalloc(sizeof(*nfmr), GFP_KERNEL)))
+				continue;
+
+			nfmr->offset = 6;
+
+			if ((c = fmrd[IFLA_IPTUN_FMR_IP6_PREFIX]))
+				nla_memcpy(&nfmr->ip6_prefix, fmrd[IFLA_IPTUN_FMR_IP6_PREFIX],
+					sizeof(nfmr->ip6_prefix));
+
+			if ((c = fmrd[IFLA_IPTUN_FMR_IP4_PREFIX]))
+				nla_memcpy(&nfmr->ip4_prefix, fmrd[IFLA_IPTUN_FMR_IP4_PREFIX],
+					sizeof(nfmr->ip4_prefix));
+
+			if ((c = fmrd[IFLA_IPTUN_FMR_IP6_PREFIX_LEN]))
+				nfmr->ip6_prefix_len = nla_get_u8(c);
+
+			if ((c = fmrd[IFLA_IPTUN_FMR_IP4_PREFIX_LEN]))
+				nfmr->ip4_prefix_len = nla_get_u8(c);
+
+			if ((c = fmrd[IFLA_IPTUN_FMR_EA_LEN]))
+				nfmr->ea_len = nla_get_u8(c);
+
+			if ((c = fmrd[IFLA_IPTUN_FMR_OFFSET]))
+				nfmr->offset = nla_get_u8(c);
+
+			nfmr->next = parms->fmrs;
+			parms->fmrs = nfmr;
+		}
+	}
 }
 
 static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[],
@@ -2112,6 +2331,12 @@ static void ip6_tnl_dellink(struct net_d
 
 static size_t ip6_tnl_get_size(const struct net_device *dev)
 {
+	const struct ip6_tnl *t = netdev_priv(dev);
+	struct __ip6_tnl_fmr *c;
+	int fmrs = 0;
+	for (c = t->parms.fmrs; c; c = c->next)
+		++fmrs;
+
 	return
 		/* IFLA_IPTUN_LINK */
 		nla_total_size(4) +
@@ -2141,6 +2366,24 @@ static size_t ip6_tnl_get_size(const str
 		nla_total_size(0) +
 		/* IFLA_IPTUN_FWMARK */
 		nla_total_size(4) +
+		/* IFLA_IPTUN_FMRS */
+		nla_total_size(0) +
+		(
+			/* nest */
+			nla_total_size(0) +
+			/* IFLA_IPTUN_FMR_IP6_PREFIX */
+			nla_total_size(sizeof(struct in6_addr)) +
+			/* IFLA_IPTUN_FMR_IP4_PREFIX */
+			nla_total_size(sizeof(struct in_addr)) +
+			/* IFLA_IPTUN_FMR_EA_LEN */
+			nla_total_size(1) +
+			/* IFLA_IPTUN_FMR_IP6_PREFIX_LEN */
+			nla_total_size(1) +
+			/* IFLA_IPTUN_FMR_IP4_PREFIX_LEN */
+			nla_total_size(1) +
+			/* IFLA_IPTUN_FMR_OFFSET */
+			nla_total_size(1)
+		) * fmrs +
 		0;
 }
 
@@ -2148,6 +2391,9 @@ static int ip6_tnl_fill_info(struct sk_b
 {
 	struct ip6_tnl *tunnel = netdev_priv(dev);
 	struct __ip6_tnl_parm *parm = &tunnel->parms;
+	struct __ip6_tnl_fmr *c;
+	int fmrcnt = 0;
+	struct nlattr *fmrs;
 
 	if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
 	    nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) ||
@@ -2157,9 +2403,27 @@ static int ip6_tnl_fill_info(struct sk_b
 	    nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
 	    nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
 	    nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto) ||
-	    nla_put_u32(skb, IFLA_IPTUN_FWMARK, parm->fwmark))
+	    nla_put_u32(skb, IFLA_IPTUN_FWMARK, parm->fwmark) ||
+	    !(fmrs = nla_nest_start(skb, IFLA_IPTUN_FMRS)))
 		goto nla_put_failure;
 
+	for (c = parm->fmrs; c; c = c->next) {
+		struct nlattr *fmr = nla_nest_start(skb, ++fmrcnt);
+		if (!fmr ||
+			nla_put(skb, IFLA_IPTUN_FMR_IP6_PREFIX,
+				sizeof(c->ip6_prefix), &c->ip6_prefix) ||
+			nla_put(skb, IFLA_IPTUN_FMR_IP4_PREFIX,
+				sizeof(c->ip4_prefix), &c->ip4_prefix) ||
+			nla_put_u8(skb, IFLA_IPTUN_FMR_IP6_PREFIX_LEN, c->ip6_prefix_len) ||
+			nla_put_u8(skb, IFLA_IPTUN_FMR_IP4_PREFIX_LEN, c->ip4_prefix_len) ||
+			nla_put_u8(skb, IFLA_IPTUN_FMR_EA_LEN, c->ea_len) ||
+			nla_put_u8(skb, IFLA_IPTUN_FMR_OFFSET, c->offset))
+				goto nla_put_failure;
+
+		nla_nest_end(skb, fmr);
+	}
+	nla_nest_end(skb, fmrs);
+
 	if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) ||
 	    nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) ||
 	    nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) ||
@@ -2199,6 +2463,7 @@ static const struct nla_policy ip6_tnl_p
 	[IFLA_IPTUN_ENCAP_DPORT]	= { .type = NLA_U16 },
 	[IFLA_IPTUN_COLLECT_METADATA]	= { .type = NLA_FLAG },
 	[IFLA_IPTUN_FWMARK]		= { .type = NLA_U32 },
+	[IFLA_IPTUN_FMRS]		= { .type = NLA_NESTED },
 };
 
 static struct rtnl_link_ops ip6_link_ops __read_mostly = {