mirror of
https://github.com/openwrt/openwrt.git
synced 2024-12-20 06:08:08 +00:00
kernel: add a patch to make netfilter conntrack cache routing information
Significantly improves routing / NAT performance Signed-off-by: Felix Fietkau <nbd@openwrt.org> SVN-Revision: 43587
This commit is contained in:
parent
db04bbd2ec
commit
27f36718d3
@ -66,6 +66,7 @@ $(eval $(if $(NF_KMOD),,$(call nf_add,IPT_CORE,CONFIG_NETFILTER_XT_MARK, $(P_XT)
|
|||||||
|
|
||||||
# kernel only
|
# kernel only
|
||||||
$(eval $(if $(NF_KMOD),$(call nf_add,NF_CONNTRACK,CONFIG_NF_CONNTRACK, $(P_XT)nf_conntrack),))
|
$(eval $(if $(NF_KMOD),$(call nf_add,NF_CONNTRACK,CONFIG_NF_CONNTRACK, $(P_XT)nf_conntrack),))
|
||||||
|
$(eval $(if $(NF_KMOD),$(call nf_add,NF_CONNTRACK,CONFIG_NF_CONNTRACK_RTCACHE, $(P_XT)nf_conntrack_rtcache),))
|
||||||
$(eval $(if $(NF_KMOD),$(call nf_add,NF_CONNTRACK,CONFIG_NF_DEFRAG_IPV4, $(P_V4)nf_defrag_ipv4),))
|
$(eval $(if $(NF_KMOD),$(call nf_add,NF_CONNTRACK,CONFIG_NF_DEFRAG_IPV4, $(P_V4)nf_defrag_ipv4),))
|
||||||
$(eval $(if $(NF_KMOD),$(call nf_add,NF_CONNTRACK,CONFIG_NF_CONNTRACK_IPV4, $(P_V4)nf_conntrack_ipv4),))
|
$(eval $(if $(NF_KMOD),$(call nf_add,NF_CONNTRACK,CONFIG_NF_CONNTRACK_IPV4, $(P_V4)nf_conntrack_ipv4),))
|
||||||
|
|
||||||
|
@ -0,0 +1,504 @@
|
|||||||
|
Subject: netfilter: conntrack: cache route for forwarded connections
|
||||||
|
|
||||||
|
... to avoid per-packet FIB lookup if possible.
|
||||||
|
|
||||||
|
The cached dst is re-used provided the input interface
|
||||||
|
is the same as that of the previous packet in the same direction.
|
||||||
|
|
||||||
|
If not, the cached dst is invalidated.
|
||||||
|
|
||||||
|
For ipv6 we also need to store sernum, else dst_check doesn't work,
|
||||||
|
pointed out by Eric Dumazet.
|
||||||
|
|
||||||
|
This should speed up forwarding when conntrack is already in use
|
||||||
|
anyway, especially when using reverse path filtering -- active RPF
|
||||||
|
enforces two FIB lookups for each packet.
|
||||||
|
|
||||||
|
Before the routing cache removal this didn't matter since RPF was performed
|
||||||
|
only when route cache didn't yield a result; but without route cache it
|
||||||
|
comes at higher price.
|
||||||
|
|
||||||
|
Julian Anastasov suggested to add NETDEV_UNREGISTER handler to
|
||||||
|
avoid holding on to dsts of 'frozen' conntracks.
|
||||||
|
|
||||||
|
Signed-off-by: Florian Westphal <fw@strlen.de>
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_conntrack_extend.h
|
||||||
|
+++ b/include/net/netfilter/nf_conntrack_extend.h
|
||||||
|
@@ -30,6 +30,9 @@ enum nf_ct_ext_id {
|
||||||
|
#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
|
||||||
|
NF_CT_EXT_SYNPROXY,
|
||||||
|
#endif
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_RTCACHE)
|
||||||
|
+ NF_CT_EXT_RTCACHE,
|
||||||
|
+#endif
|
||||||
|
NF_CT_EXT_NUM,
|
||||||
|
};
|
||||||
|
|
||||||
|
@@ -43,6 +46,7 @@ enum nf_ct_ext_id {
|
||||||
|
#define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout
|
||||||
|
#define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels
|
||||||
|
#define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy
|
||||||
|
+#define NF_CT_EXT_RTCACHE_TYPE struct nf_conn_rtcache
|
||||||
|
|
||||||
|
/* Extensions: optional stuff which isn't permanently in struct. */
|
||||||
|
struct nf_ct_ext {
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/include/net/netfilter/nf_conntrack_rtcache.h
|
||||||
|
@@ -0,0 +1,34 @@
|
||||||
|
+#include <linux/gfp.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack_extend.h>
|
||||||
|
+
|
||||||
|
+struct dst_entry;
|
||||||
|
+
|
||||||
|
+struct nf_conn_dst_cache {
|
||||||
|
+ struct dst_entry *dst;
|
||||||
|
+ int iif;
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
|
||||||
|
+ u32 cookie;
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+struct nf_conn_rtcache {
|
||||||
|
+ struct nf_conn_dst_cache cached_dst[IP_CT_DIR_MAX];
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static inline
|
||||||
|
+struct nf_conn_rtcache *nf_ct_rtcache_find(const struct nf_conn *ct)
|
||||||
|
+{
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_RTCACHE)
|
||||||
|
+ return nf_ct_ext_find(ct, NF_CT_EXT_RTCACHE);
|
||||||
|
+#else
|
||||||
|
+ return NULL;
|
||||||
|
+#endif
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static inline int nf_conn_rtcache_iif_get(const struct nf_conn_rtcache *rtc,
|
||||||
|
+ enum ip_conntrack_dir dir)
|
||||||
|
+{
|
||||||
|
+ return rtc->cached_dst[dir].iif;
|
||||||
|
+}
|
||||||
|
--- a/net/netfilter/Kconfig
|
||||||
|
+++ b/net/netfilter/Kconfig
|
||||||
|
@@ -103,6 +103,18 @@ config NF_CONNTRACK_EVENTS
|
||||||
|
|
||||||
|
If unsure, say `N'.
|
||||||
|
|
||||||
|
+config NF_CONNTRACK_RTCACHE
|
||||||
|
+ tristate "Cache route entries in conntrack objects"
|
||||||
|
+ depends on NETFILTER_ADVANCED
|
||||||
|
+ depends on NF_CONNTRACK
|
||||||
|
+ help
|
||||||
|
+ If this option is enabled, the connection tracking code will
|
||||||
|
+ cache routing information for each connection that is being
|
||||||
|
+ forwarded, at a cost of 32 bytes per conntrack object.
|
||||||
|
+
|
||||||
|
+ To compile it as a module, choose M here. If unsure, say N.
|
||||||
|
+ The module will be called nf_conntrack_rtcache.
|
||||||
|
+
|
||||||
|
config NF_CONNTRACK_TIMEOUT
|
||||||
|
bool 'Connection tracking timeout'
|
||||||
|
depends on NETFILTER_ADVANCED
|
||||||
|
--- a/net/netfilter/Makefile
|
||||||
|
+++ b/net/netfilter/Makefile
|
||||||
|
@@ -18,6 +18,9 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += n
|
||||||
|
# connection tracking
|
||||||
|
obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
|
||||||
|
|
||||||
|
+# optional conntrack route cache extension
|
||||||
|
+obj-$(CONFIG_NF_CONNTRACK_RTCACHE) += nf_conntrack_rtcache.o
|
||||||
|
+
|
||||||
|
# SCTP protocol connection tracking
|
||||||
|
obj-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
|
||||||
|
obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/net/netfilter/nf_conntrack_rtcache.c
|
||||||
|
@@ -0,0 +1,386 @@
|
||||||
|
+/* route cache for netfilter.
|
||||||
|
+ *
|
||||||
|
+ * (C) 2014 Red Hat GmbH
|
||||||
|
+ *
|
||||||
|
+ * This program is free software; you can redistribute it and/or modify
|
||||||
|
+ * it under the terms of the GNU General Public License version 2 as
|
||||||
|
+ * published by the Free Software Foundation.
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||||
|
+
|
||||||
|
+#include <linux/types.h>
|
||||||
|
+#include <linux/netfilter.h>
|
||||||
|
+#include <linux/skbuff.h>
|
||||||
|
+#include <linux/stddef.h>
|
||||||
|
+#include <linux/kernel.h>
|
||||||
|
+#include <linux/netdevice.h>
|
||||||
|
+#include <linux/export.h>
|
||||||
|
+#include <linux/module.h>
|
||||||
|
+
|
||||||
|
+#include <net/dst.h>
|
||||||
|
+
|
||||||
|
+#include <net/netfilter/nf_conntrack.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack_core.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack_extend.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack_rtcache.h>
|
||||||
|
+
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
|
||||||
|
+#include <net/ip6_fib.h>
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+static void __nf_conn_rtcache_destroy(struct nf_conn_rtcache *rtc,
|
||||||
|
+ enum ip_conntrack_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct dst_entry *dst = rtc->cached_dst[dir].dst;
|
||||||
|
+
|
||||||
|
+ dst_release(dst);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void nf_conn_rtcache_destroy(struct nf_conn *ct)
|
||||||
|
+{
|
||||||
|
+ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct);
|
||||||
|
+
|
||||||
|
+ if (!rtc)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ __nf_conn_rtcache_destroy(rtc, IP_CT_DIR_ORIGINAL);
|
||||||
|
+ __nf_conn_rtcache_destroy(rtc, IP_CT_DIR_REPLY);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void nf_ct_rtcache_ext_add(struct nf_conn *ct)
|
||||||
|
+{
|
||||||
|
+ struct nf_conn_rtcache *rtc;
|
||||||
|
+
|
||||||
|
+ rtc = nf_ct_ext_add(ct, NF_CT_EXT_RTCACHE, GFP_ATOMIC);
|
||||||
|
+ if (rtc) {
|
||||||
|
+ rtc->cached_dst[IP_CT_DIR_ORIGINAL].iif = -1;
|
||||||
|
+ rtc->cached_dst[IP_CT_DIR_ORIGINAL].dst = NULL;
|
||||||
|
+ rtc->cached_dst[IP_CT_DIR_REPLY].iif = -1;
|
||||||
|
+ rtc->cached_dst[IP_CT_DIR_REPLY].dst = NULL;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct nf_conn_rtcache *nf_ct_rtcache_find_usable(struct nf_conn *ct)
|
||||||
|
+{
|
||||||
|
+ if (nf_ct_is_untracked(ct))
|
||||||
|
+ return NULL;
|
||||||
|
+ return nf_ct_rtcache_find(ct);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct dst_entry *
|
||||||
|
+nf_conn_rtcache_dst_get(const struct nf_conn_rtcache *rtc,
|
||||||
|
+ enum ip_conntrack_dir dir)
|
||||||
|
+{
|
||||||
|
+ return rtc->cached_dst[dir].dst;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static u32 nf_rtcache_get_cookie(int pf, const struct dst_entry *dst)
|
||||||
|
+{
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
|
||||||
|
+ if (pf == NFPROTO_IPV6) {
|
||||||
|
+ const struct rt6_info *rt = (const struct rt6_info *)dst;
|
||||||
|
+
|
||||||
|
+ if (rt->rt6i_node)
|
||||||
|
+ return (u32)rt->rt6i_node->fn_sernum;
|
||||||
|
+ }
|
||||||
|
+#endif
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void nf_conn_rtcache_dst_set(int pf,
|
||||||
|
+ struct nf_conn_rtcache *rtc,
|
||||||
|
+ struct dst_entry *dst,
|
||||||
|
+ enum ip_conntrack_dir dir, int iif)
|
||||||
|
+{
|
||||||
|
+ if (rtc->cached_dst[dir].iif != iif)
|
||||||
|
+ rtc->cached_dst[dir].iif = iif;
|
||||||
|
+
|
||||||
|
+ if (rtc->cached_dst[dir].dst != dst) {
|
||||||
|
+ struct dst_entry *old;
|
||||||
|
+
|
||||||
|
+ dst_hold(dst);
|
||||||
|
+
|
||||||
|
+ old = xchg(&rtc->cached_dst[dir].dst, dst);
|
||||||
|
+ dst_release(old);
|
||||||
|
+
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
|
||||||
|
+ if (pf == NFPROTO_IPV6)
|
||||||
|
+ rtc->cached_dst[dir].cookie =
|
||||||
|
+ nf_rtcache_get_cookie(pf, dst);
|
||||||
|
+#endif
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void nf_conn_rtcache_dst_obsolete(struct nf_conn_rtcache *rtc,
|
||||||
|
+ enum ip_conntrack_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct dst_entry *old;
|
||||||
|
+
|
||||||
|
+ pr_debug("Invalidate iif %d for dir %d on cache %p\n",
|
||||||
|
+ rtc->cached_dst[dir].iif, dir, rtc);
|
||||||
|
+
|
||||||
|
+ old = xchg(&rtc->cached_dst[dir].dst, NULL);
|
||||||
|
+ dst_release(old);
|
||||||
|
+ rtc->cached_dst[dir].iif = -1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static unsigned int nf_rtcache_in(const struct nf_hook_ops *ops,
|
||||||
|
+ struct sk_buff *skb,
|
||||||
|
+ const struct net_device *in,
|
||||||
|
+ const struct net_device *out,
|
||||||
|
+ int (*okfn)(struct sk_buff *))
|
||||||
|
+{
|
||||||
|
+ struct nf_conn_rtcache *rtc;
|
||||||
|
+ enum ip_conntrack_info ctinfo;
|
||||||
|
+ enum ip_conntrack_dir dir;
|
||||||
|
+ struct dst_entry *dst;
|
||||||
|
+ struct nf_conn *ct;
|
||||||
|
+ int iif;
|
||||||
|
+ u32 cookie;
|
||||||
|
+
|
||||||
|
+ if (skb_dst(skb) || skb->sk)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ ct = nf_ct_get(skb, &ctinfo);
|
||||||
|
+ if (!ct)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ rtc = nf_ct_rtcache_find_usable(ct);
|
||||||
|
+ if (!rtc)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ /* if iif changes, don't use cache and let ip stack
|
||||||
|
+ * do route lookup.
|
||||||
|
+ *
|
||||||
|
+ * If rp_filter is enabled it might toss skb, so
|
||||||
|
+ * we don't want to avoid these checks.
|
||||||
|
+ */
|
||||||
|
+ dir = CTINFO2DIR(ctinfo);
|
||||||
|
+ iif = nf_conn_rtcache_iif_get(rtc, dir);
|
||||||
|
+ if (in->ifindex != iif) {
|
||||||
|
+ pr_debug("ct %p, iif %d, cached iif %d, skip cached entry\n",
|
||||||
|
+ ct, iif, in->ifindex);
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+ }
|
||||||
|
+ dst = nf_conn_rtcache_dst_get(rtc, dir);
|
||||||
|
+ if (dst == NULL)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ cookie = nf_rtcache_get_cookie(ops->pf, dst);
|
||||||
|
+
|
||||||
|
+ dst = dst_check(dst, cookie);
|
||||||
|
+ pr_debug("obtained dst %p for skb %p, cookie %d\n", dst, skb, cookie);
|
||||||
|
+ if (likely(dst))
|
||||||
|
+ skb_dst_set_noref_force(skb, dst);
|
||||||
|
+ else
|
||||||
|
+ nf_conn_rtcache_dst_obsolete(rtc, dir);
|
||||||
|
+
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static unsigned int nf_rtcache_forward(const struct nf_hook_ops *ops,
|
||||||
|
+ struct sk_buff *skb,
|
||||||
|
+ const struct net_device *in,
|
||||||
|
+ const struct net_device *out,
|
||||||
|
+ int (*okfn)(struct sk_buff *))
|
||||||
|
+{
|
||||||
|
+ struct nf_conn_rtcache *rtc;
|
||||||
|
+ enum ip_conntrack_info ctinfo;
|
||||||
|
+ enum ip_conntrack_dir dir;
|
||||||
|
+ struct nf_conn *ct;
|
||||||
|
+ int iif;
|
||||||
|
+
|
||||||
|
+ ct = nf_ct_get(skb, &ctinfo);
|
||||||
|
+ if (!ct)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ if (!nf_ct_is_confirmed(ct)) {
|
||||||
|
+ if (WARN_ON(nf_ct_rtcache_find(ct)))
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+ nf_ct_rtcache_ext_add(ct);
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ rtc = nf_ct_rtcache_find_usable(ct);
|
||||||
|
+ if (!rtc)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ dir = CTINFO2DIR(ctinfo);
|
||||||
|
+ iif = nf_conn_rtcache_iif_get(rtc, dir);
|
||||||
|
+ pr_debug("ct %p, skb %p, dir %d, iif %d, cached iif %d\n",
|
||||||
|
+ ct, skb, dir, iif, in->ifindex);
|
||||||
|
+ if (likely(in->ifindex == iif))
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ nf_conn_rtcache_dst_set(ops->pf, rtc, skb_dst(skb), dir, in->ifindex);
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_rtcache_dst_remove(struct nf_conn *ct, void *data)
|
||||||
|
+{
|
||||||
|
+ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct);
|
||||||
|
+ struct net_device *dev = data;
|
||||||
|
+
|
||||||
|
+ if (!rtc)
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ if (dev->ifindex == rtc->cached_dst[IP_CT_DIR_ORIGINAL].iif ||
|
||||||
|
+ dev->ifindex == rtc->cached_dst[IP_CT_DIR_REPLY].iif) {
|
||||||
|
+ nf_conn_rtcache_dst_obsolete(rtc, IP_CT_DIR_ORIGINAL);
|
||||||
|
+ nf_conn_rtcache_dst_obsolete(rtc, IP_CT_DIR_REPLY);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_rtcache_netdev_event(struct notifier_block *this,
|
||||||
|
+ unsigned long event, void *ptr)
|
||||||
|
+{
|
||||||
|
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
|
||||||
|
+ struct net *net = dev_net(dev);
|
||||||
|
+
|
||||||
|
+ if (event == NETDEV_DOWN)
|
||||||
|
+ nf_ct_iterate_cleanup(net, nf_rtcache_dst_remove, dev, 0, 0);
|
||||||
|
+
|
||||||
|
+ return NOTIFY_DONE;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct notifier_block nf_rtcache_notifier = {
|
||||||
|
+ .notifier_call = nf_rtcache_netdev_event,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static struct nf_hook_ops rtcache_ops[] = {
|
||||||
|
+ {
|
||||||
|
+ .hook = nf_rtcache_in,
|
||||||
|
+ .owner = THIS_MODULE,
|
||||||
|
+ .pf = NFPROTO_IPV4,
|
||||||
|
+ .hooknum = NF_INET_PRE_ROUTING,
|
||||||
|
+ .priority = NF_IP_PRI_LAST,
|
||||||
|
+ },
|
||||||
|
+ {
|
||||||
|
+ .hook = nf_rtcache_forward,
|
||||||
|
+ .owner = THIS_MODULE,
|
||||||
|
+ .pf = NFPROTO_IPV4,
|
||||||
|
+ .hooknum = NF_INET_FORWARD,
|
||||||
|
+ .priority = NF_IP_PRI_LAST,
|
||||||
|
+ },
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
|
||||||
|
+ {
|
||||||
|
+ .hook = nf_rtcache_in,
|
||||||
|
+ .owner = THIS_MODULE,
|
||||||
|
+ .pf = NFPROTO_IPV6,
|
||||||
|
+ .hooknum = NF_INET_PRE_ROUTING,
|
||||||
|
+ .priority = NF_IP_PRI_LAST,
|
||||||
|
+ },
|
||||||
|
+ {
|
||||||
|
+ .hook = nf_rtcache_forward,
|
||||||
|
+ .owner = THIS_MODULE,
|
||||||
|
+ .pf = NFPROTO_IPV6,
|
||||||
|
+ .hooknum = NF_INET_FORWARD,
|
||||||
|
+ .priority = NF_IP_PRI_LAST,
|
||||||
|
+ },
|
||||||
|
+#endif
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static struct nf_ct_ext_type rtcache_extend __read_mostly = {
|
||||||
|
+ .len = sizeof(struct nf_conn_rtcache),
|
||||||
|
+ .align = __alignof__(struct nf_conn_rtcache),
|
||||||
|
+ .id = NF_CT_EXT_RTCACHE,
|
||||||
|
+ .destroy = nf_conn_rtcache_destroy,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static int __init nf_conntrack_rtcache_init(void)
|
||||||
|
+{
|
||||||
|
+ int ret = nf_ct_extend_register(&rtcache_extend);
|
||||||
|
+
|
||||||
|
+ if (ret < 0) {
|
||||||
|
+ pr_err("nf_conntrack_rtcache: Unable to register extension\n");
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ ret = nf_register_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops));
|
||||||
|
+ if (ret < 0) {
|
||||||
|
+ nf_ct_extend_unregister(&rtcache_extend);
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ ret = register_netdevice_notifier(&nf_rtcache_notifier);
|
||||||
|
+ if (ret) {
|
||||||
|
+ nf_unregister_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops));
|
||||||
|
+ nf_ct_extend_unregister(&rtcache_extend);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_rtcache_ext_remove(struct nf_conn *ct, void *data)
|
||||||
|
+{
|
||||||
|
+ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct);
|
||||||
|
+
|
||||||
|
+ return rtc != NULL;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static bool __exit nf_conntrack_rtcache_wait_for_dying(struct net *net)
|
||||||
|
+{
|
||||||
|
+ bool wait = false;
|
||||||
|
+ int cpu;
|
||||||
|
+
|
||||||
|
+ for_each_possible_cpu(cpu) {
|
||||||
|
+ struct nf_conntrack_tuple_hash *h;
|
||||||
|
+ struct hlist_nulls_node *n;
|
||||||
|
+ struct nf_conn *ct;
|
||||||
|
+
|
||||||
|
+ rcu_read_lock();
|
||||||
|
+ spin_lock_bh(&nf_conntrack_lock);
|
||||||
|
+
|
||||||
|
+ hlist_nulls_for_each_entry(h, n, &net->ct.dying, hnnode) {
|
||||||
|
+ ct = nf_ct_tuplehash_to_ctrack(h);
|
||||||
|
+ if (nf_ct_rtcache_find(ct) != NULL) {
|
||||||
|
+ wait = true;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ spin_unlock_bh(&nf_conntrack_lock);
|
||||||
|
+ rcu_read_unlock();
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return wait;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void __exit nf_conntrack_rtcache_fini(void)
|
||||||
|
+{
|
||||||
|
+ struct net *net;
|
||||||
|
+ int count = 0;
|
||||||
|
+
|
||||||
|
+ /* remove hooks so no new connections get rtcache extension */
|
||||||
|
+ nf_unregister_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops));
|
||||||
|
+
|
||||||
|
+ synchronize_net();
|
||||||
|
+
|
||||||
|
+ unregister_netdevice_notifier(&nf_rtcache_notifier);
|
||||||
|
+
|
||||||
|
+ rtnl_lock();
|
||||||
|
+
|
||||||
|
+ /* zap all conntracks with rtcache extension */
|
||||||
|
+ for_each_net(net)
|
||||||
|
+ nf_ct_iterate_cleanup(net, nf_rtcache_ext_remove, NULL, 0, 0);
|
||||||
|
+
|
||||||
|
+ for_each_net(net) {
|
||||||
|
+ /* .. and make sure they're gone from dying list, too */
|
||||||
|
+ while (nf_conntrack_rtcache_wait_for_dying(net)) {
|
||||||
|
+ msleep(200);
|
||||||
|
+ WARN_ONCE(++count > 25, "Waiting for all rtcache conntracks to go away\n");
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ rtnl_unlock();
|
||||||
|
+ synchronize_net();
|
||||||
|
+ nf_ct_extend_unregister(&rtcache_extend);
|
||||||
|
+}
|
||||||
|
+module_init(nf_conntrack_rtcache_init);
|
||||||
|
+module_exit(nf_conntrack_rtcache_fini);
|
||||||
|
+
|
||||||
|
+MODULE_LICENSE("GPL");
|
||||||
|
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
|
||||||
|
+MODULE_DESCRIPTION("Conntrack route cache extension");
|
@ -1,6 +1,6 @@
|
|||||||
--- a/net/netfilter/Kconfig
|
--- a/net/netfilter/Kconfig
|
||||||
+++ b/net/netfilter/Kconfig
|
+++ b/net/netfilter/Kconfig
|
||||||
@@ -195,7 +195,6 @@ config NF_CONNTRACK_FTP
|
@@ -207,7 +207,6 @@ config NF_CONNTRACK_FTP
|
||||||
|
|
||||||
config NF_CONNTRACK_H323
|
config NF_CONNTRACK_H323
|
||||||
tristate "H.323 protocol support"
|
tristate "H.323 protocol support"
|
||||||
@ -8,7 +8,7 @@
|
|||||||
depends on NETFILTER_ADVANCED
|
depends on NETFILTER_ADVANCED
|
||||||
help
|
help
|
||||||
H.323 is a VoIP signalling protocol from ITU-T. As one of the most
|
H.323 is a VoIP signalling protocol from ITU-T. As one of the most
|
||||||
@@ -871,7 +870,6 @@ config NETFILTER_XT_TARGET_SECMARK
|
@@ -883,7 +882,6 @@ config NETFILTER_XT_TARGET_SECMARK
|
||||||
|
|
||||||
config NETFILTER_XT_TARGET_TCPMSS
|
config NETFILTER_XT_TARGET_TCPMSS
|
||||||
tristate '"TCPMSS" target support'
|
tristate '"TCPMSS" target support'
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
--- a/net/netfilter/Kconfig
|
--- a/net/netfilter/Kconfig
|
||||||
+++ b/net/netfilter/Kconfig
|
+++ b/net/netfilter/Kconfig
|
||||||
@@ -1345,6 +1345,27 @@ config NETFILTER_XT_MATCH_STATE
|
@@ -1357,6 +1357,27 @@ config NETFILTER_XT_MATCH_STATE
|
||||||
|
|
||||||
To compile it as a module, choose M here. If unsure, say N.
|
To compile it as a module, choose M here. If unsure, say N.
|
||||||
|
|
||||||
@ -30,7 +30,7 @@
|
|||||||
depends on NETFILTER_ADVANCED
|
depends on NETFILTER_ADVANCED
|
||||||
--- a/net/netfilter/Makefile
|
--- a/net/netfilter/Makefile
|
||||||
+++ b/net/netfilter/Makefile
|
+++ b/net/netfilter/Makefile
|
||||||
@@ -158,6 +158,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT)
|
@@ -161,6 +161,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT)
|
||||||
obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o
|
obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o
|
||||||
obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o
|
obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o
|
||||||
obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o
|
obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
--- a/net/netfilter/Kconfig
|
--- a/net/netfilter/Kconfig
|
||||||
+++ b/net/netfilter/Kconfig
|
+++ b/net/netfilter/Kconfig
|
||||||
@@ -1151,6 +1151,27 @@ config NETFILTER_XT_MATCH_L2TP
|
@@ -1163,6 +1163,27 @@ config NETFILTER_XT_MATCH_L2TP
|
||||||
|
|
||||||
To compile it as a module, choose M here. If unsure, say N.
|
To compile it as a module, choose M here. If unsure, say N.
|
||||||
|
|
||||||
@ -28,7 +28,7 @@
|
|||||||
config NETFILTER_XT_MATCH_LENGTH
|
config NETFILTER_XT_MATCH_LENGTH
|
||||||
tristate '"length" match support'
|
tristate '"length" match support'
|
||||||
depends on NETFILTER_ADVANCED
|
depends on NETFILTER_ADVANCED
|
||||||
@@ -1345,26 +1366,11 @@ config NETFILTER_XT_MATCH_STATE
|
@@ -1357,26 +1378,11 @@ config NETFILTER_XT_MATCH_STATE
|
||||||
|
|
||||||
To compile it as a module, choose M here. If unsure, say N.
|
To compile it as a module, choose M here. If unsure, say N.
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
+#endif /* XT_ID_H */
|
+#endif /* XT_ID_H */
|
||||||
--- a/net/netfilter/Kconfig
|
--- a/net/netfilter/Kconfig
|
||||||
+++ b/net/netfilter/Kconfig
|
+++ b/net/netfilter/Kconfig
|
||||||
@@ -1121,6 +1121,13 @@ config NETFILTER_XT_MATCH_IPCOMP
|
@@ -1133,6 +1133,13 @@ config NETFILTER_XT_MATCH_IPCOMP
|
||||||
|
|
||||||
To compile it as a module, choose M here. If unsure, say N.
|
To compile it as a module, choose M here. If unsure, say N.
|
||||||
|
|
||||||
@ -37,7 +37,7 @@
|
|||||||
depends on NETFILTER_ADVANCED
|
depends on NETFILTER_ADVANCED
|
||||||
--- a/net/netfilter/Makefile
|
--- a/net/netfilter/Makefile
|
||||||
+++ b/net/netfilter/Makefile
|
+++ b/net/netfilter/Makefile
|
||||||
@@ -136,6 +136,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) +=
|
@@ -139,6 +139,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) +=
|
||||||
obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
|
obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
|
||||||
obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
|
obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
|
||||||
obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o
|
obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o
|
||||||
|
@ -0,0 +1,505 @@
|
|||||||
|
Subject: netfilter: conntrack: cache route for forwarded connections
|
||||||
|
|
||||||
|
... to avoid per-packet FIB lookup if possible.
|
||||||
|
|
||||||
|
The cached dst is re-used provided the input interface
|
||||||
|
is the same as that of the previous packet in the same direction.
|
||||||
|
|
||||||
|
If not, the cached dst is invalidated.
|
||||||
|
|
||||||
|
For ipv6 we also need to store sernum, else dst_check doesn't work,
|
||||||
|
pointed out by Eric Dumazet.
|
||||||
|
|
||||||
|
This should speed up forwarding when conntrack is already in use
|
||||||
|
anyway, especially when using reverse path filtering -- active RPF
|
||||||
|
enforces two FIB lookups for each packet.
|
||||||
|
|
||||||
|
Before the routing cache removal this didn't matter since RPF was performed
|
||||||
|
only when route cache didn't yield a result; but without route cache it
|
||||||
|
comes at higher price.
|
||||||
|
|
||||||
|
Julian Anastasov suggested to add NETDEV_UNREGISTER handler to
|
||||||
|
avoid holding on to dsts of 'frozen' conntracks.
|
||||||
|
|
||||||
|
Signed-off-by: Florian Westphal <fw@strlen.de>
|
||||||
|
|
||||||
|
--- a/include/net/netfilter/nf_conntrack_extend.h
|
||||||
|
+++ b/include/net/netfilter/nf_conntrack_extend.h
|
||||||
|
@@ -30,6 +30,9 @@ enum nf_ct_ext_id {
|
||||||
|
#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
|
||||||
|
NF_CT_EXT_SYNPROXY,
|
||||||
|
#endif
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_RTCACHE)
|
||||||
|
+ NF_CT_EXT_RTCACHE,
|
||||||
|
+#endif
|
||||||
|
NF_CT_EXT_NUM,
|
||||||
|
};
|
||||||
|
|
||||||
|
@@ -43,6 +46,7 @@ enum nf_ct_ext_id {
|
||||||
|
#define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout
|
||||||
|
#define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels
|
||||||
|
#define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy
|
||||||
|
+#define NF_CT_EXT_RTCACHE_TYPE struct nf_conn_rtcache
|
||||||
|
|
||||||
|
/* Extensions: optional stuff which isn't permanently in struct. */
|
||||||
|
struct nf_ct_ext {
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/include/net/netfilter/nf_conntrack_rtcache.h
|
||||||
|
@@ -0,0 +1,34 @@
|
||||||
|
+#include <linux/gfp.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack_extend.h>
|
||||||
|
+
|
||||||
|
+struct dst_entry;
|
||||||
|
+
|
||||||
|
+struct nf_conn_dst_cache {
|
||||||
|
+ struct dst_entry *dst;
|
||||||
|
+ int iif;
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
|
||||||
|
+ u32 cookie;
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+struct nf_conn_rtcache {
|
||||||
|
+ struct nf_conn_dst_cache cached_dst[IP_CT_DIR_MAX];
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static inline
|
||||||
|
+struct nf_conn_rtcache *nf_ct_rtcache_find(const struct nf_conn *ct)
|
||||||
|
+{
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_RTCACHE)
|
||||||
|
+ return nf_ct_ext_find(ct, NF_CT_EXT_RTCACHE);
|
||||||
|
+#else
|
||||||
|
+ return NULL;
|
||||||
|
+#endif
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static inline int nf_conn_rtcache_iif_get(const struct nf_conn_rtcache *rtc,
|
||||||
|
+ enum ip_conntrack_dir dir)
|
||||||
|
+{
|
||||||
|
+ return rtc->cached_dst[dir].iif;
|
||||||
|
+}
|
||||||
|
--- a/net/netfilter/Kconfig
|
||||||
|
+++ b/net/netfilter/Kconfig
|
||||||
|
@@ -106,6 +106,18 @@ config NF_CONNTRACK_EVENTS
|
||||||
|
|
||||||
|
If unsure, say `N'.
|
||||||
|
|
||||||
|
+config NF_CONNTRACK_RTCACHE
|
||||||
|
+ tristate "Cache route entries in conntrack objects"
|
||||||
|
+ depends on NETFILTER_ADVANCED
|
||||||
|
+ depends on NF_CONNTRACK
|
||||||
|
+ help
|
||||||
|
+ If this option is enabled, the connection tracking code will
|
||||||
|
+ cache routing information for each connection that is being
|
||||||
|
+ forwarded, at a cost of 32 bytes per conntrack object.
|
||||||
|
+
|
||||||
|
+ To compile it as a module, choose M here. If unsure, say N.
|
||||||
|
+ The module will be called nf_conntrack_rtcache.
|
||||||
|
+
|
||||||
|
config NF_CONNTRACK_TIMEOUT
|
||||||
|
bool 'Connection tracking timeout'
|
||||||
|
depends on NETFILTER_ADVANCED
|
||||||
|
--- a/net/netfilter/Makefile
|
||||||
|
+++ b/net/netfilter/Makefile
|
||||||
|
@@ -18,6 +18,9 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += n
|
||||||
|
# connection tracking
|
||||||
|
obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
|
||||||
|
|
||||||
|
+# optional conntrack route cache extension
|
||||||
|
+obj-$(CONFIG_NF_CONNTRACK_RTCACHE) += nf_conntrack_rtcache.o
|
||||||
|
+
|
||||||
|
# SCTP protocol connection tracking
|
||||||
|
obj-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
|
||||||
|
obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/net/netfilter/nf_conntrack_rtcache.c
|
||||||
|
@@ -0,0 +1,387 @@
|
||||||
|
+/* route cache for netfilter.
|
||||||
|
+ *
|
||||||
|
+ * (C) 2014 Red Hat GmbH
|
||||||
|
+ *
|
||||||
|
+ * This program is free software; you can redistribute it and/or modify
|
||||||
|
+ * it under the terms of the GNU General Public License version 2 as
|
||||||
|
+ * published by the Free Software Foundation.
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||||
|
+
|
||||||
|
+#include <linux/types.h>
|
||||||
|
+#include <linux/netfilter.h>
|
||||||
|
+#include <linux/skbuff.h>
|
||||||
|
+#include <linux/stddef.h>
|
||||||
|
+#include <linux/kernel.h>
|
||||||
|
+#include <linux/netdevice.h>
|
||||||
|
+#include <linux/export.h>
|
||||||
|
+#include <linux/module.h>
|
||||||
|
+
|
||||||
|
+#include <net/dst.h>
|
||||||
|
+
|
||||||
|
+#include <net/netfilter/nf_conntrack.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack_core.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack_extend.h>
|
||||||
|
+#include <net/netfilter/nf_conntrack_rtcache.h>
|
||||||
|
+
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
|
||||||
|
+#include <net/ip6_fib.h>
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+static void __nf_conn_rtcache_destroy(struct nf_conn_rtcache *rtc,
|
||||||
|
+ enum ip_conntrack_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct dst_entry *dst = rtc->cached_dst[dir].dst;
|
||||||
|
+
|
||||||
|
+ dst_release(dst);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void nf_conn_rtcache_destroy(struct nf_conn *ct)
|
||||||
|
+{
|
||||||
|
+ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct);
|
||||||
|
+
|
||||||
|
+ if (!rtc)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ __nf_conn_rtcache_destroy(rtc, IP_CT_DIR_ORIGINAL);
|
||||||
|
+ __nf_conn_rtcache_destroy(rtc, IP_CT_DIR_REPLY);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void nf_ct_rtcache_ext_add(struct nf_conn *ct)
|
||||||
|
+{
|
||||||
|
+ struct nf_conn_rtcache *rtc;
|
||||||
|
+
|
||||||
|
+ rtc = nf_ct_ext_add(ct, NF_CT_EXT_RTCACHE, GFP_ATOMIC);
|
||||||
|
+ if (rtc) {
|
||||||
|
+ rtc->cached_dst[IP_CT_DIR_ORIGINAL].iif = -1;
|
||||||
|
+ rtc->cached_dst[IP_CT_DIR_ORIGINAL].dst = NULL;
|
||||||
|
+ rtc->cached_dst[IP_CT_DIR_REPLY].iif = -1;
|
||||||
|
+ rtc->cached_dst[IP_CT_DIR_REPLY].dst = NULL;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct nf_conn_rtcache *nf_ct_rtcache_find_usable(struct nf_conn *ct)
|
||||||
|
+{
|
||||||
|
+ if (nf_ct_is_untracked(ct))
|
||||||
|
+ return NULL;
|
||||||
|
+ return nf_ct_rtcache_find(ct);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct dst_entry *
|
||||||
|
+nf_conn_rtcache_dst_get(const struct nf_conn_rtcache *rtc,
|
||||||
|
+ enum ip_conntrack_dir dir)
|
||||||
|
+{
|
||||||
|
+ return rtc->cached_dst[dir].dst;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static u32 nf_rtcache_get_cookie(int pf, const struct dst_entry *dst)
|
||||||
|
+{
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
|
||||||
|
+ if (pf == NFPROTO_IPV6) {
|
||||||
|
+ const struct rt6_info *rt = (const struct rt6_info *)dst;
|
||||||
|
+
|
||||||
|
+ if (rt->rt6i_node)
|
||||||
|
+ return (u32)rt->rt6i_node->fn_sernum;
|
||||||
|
+ }
|
||||||
|
+#endif
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void nf_conn_rtcache_dst_set(int pf,
|
||||||
|
+ struct nf_conn_rtcache *rtc,
|
||||||
|
+ struct dst_entry *dst,
|
||||||
|
+ enum ip_conntrack_dir dir, int iif)
|
||||||
|
+{
|
||||||
|
+ if (rtc->cached_dst[dir].iif != iif)
|
||||||
|
+ rtc->cached_dst[dir].iif = iif;
|
||||||
|
+
|
||||||
|
+ if (rtc->cached_dst[dir].dst != dst) {
|
||||||
|
+ struct dst_entry *old;
|
||||||
|
+
|
||||||
|
+ dst_hold(dst);
|
||||||
|
+
|
||||||
|
+ old = xchg(&rtc->cached_dst[dir].dst, dst);
|
||||||
|
+ dst_release(old);
|
||||||
|
+
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
|
||||||
|
+ if (pf == NFPROTO_IPV6)
|
||||||
|
+ rtc->cached_dst[dir].cookie =
|
||||||
|
+ nf_rtcache_get_cookie(pf, dst);
|
||||||
|
+#endif
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void nf_conn_rtcache_dst_obsolete(struct nf_conn_rtcache *rtc,
|
||||||
|
+ enum ip_conntrack_dir dir)
|
||||||
|
+{
|
||||||
|
+ struct dst_entry *old;
|
||||||
|
+
|
||||||
|
+ pr_debug("Invalidate iif %d for dir %d on cache %p\n",
|
||||||
|
+ rtc->cached_dst[dir].iif, dir, rtc);
|
||||||
|
+
|
||||||
|
+ old = xchg(&rtc->cached_dst[dir].dst, NULL);
|
||||||
|
+ dst_release(old);
|
||||||
|
+ rtc->cached_dst[dir].iif = -1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static unsigned int nf_rtcache_in(const struct nf_hook_ops *ops,
|
||||||
|
+ struct sk_buff *skb,
|
||||||
|
+ const struct net_device *in,
|
||||||
|
+ const struct net_device *out,
|
||||||
|
+ int (*okfn)(struct sk_buff *))
|
||||||
|
+{
|
||||||
|
+ struct nf_conn_rtcache *rtc;
|
||||||
|
+ enum ip_conntrack_info ctinfo;
|
||||||
|
+ enum ip_conntrack_dir dir;
|
||||||
|
+ struct dst_entry *dst;
|
||||||
|
+ struct nf_conn *ct;
|
||||||
|
+ int iif;
|
||||||
|
+ u32 cookie;
|
||||||
|
+
|
||||||
|
+ if (skb_dst(skb) || skb->sk)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ ct = nf_ct_get(skb, &ctinfo);
|
||||||
|
+ if (!ct)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ rtc = nf_ct_rtcache_find_usable(ct);
|
||||||
|
+ if (!rtc)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ /* if iif changes, don't use cache and let ip stack
|
||||||
|
+ * do route lookup.
|
||||||
|
+ *
|
||||||
|
+ * If rp_filter is enabled it might toss skb, so
|
||||||
|
+ * we don't want to avoid these checks.
|
||||||
|
+ */
|
||||||
|
+ dir = CTINFO2DIR(ctinfo);
|
||||||
|
+ iif = nf_conn_rtcache_iif_get(rtc, dir);
|
||||||
|
+ if (in->ifindex != iif) {
|
||||||
|
+ pr_debug("ct %p, iif %d, cached iif %d, skip cached entry\n",
|
||||||
|
+ ct, iif, in->ifindex);
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+ }
|
||||||
|
+ dst = nf_conn_rtcache_dst_get(rtc, dir);
|
||||||
|
+ if (dst == NULL)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ cookie = nf_rtcache_get_cookie(ops->pf, dst);
|
||||||
|
+
|
||||||
|
+ dst = dst_check(dst, cookie);
|
||||||
|
+ pr_debug("obtained dst %p for skb %p, cookie %d\n", dst, skb, cookie);
|
||||||
|
+ if (likely(dst))
|
||||||
|
+ skb_dst_set_noref_force(skb, dst);
|
||||||
|
+ else
|
||||||
|
+ nf_conn_rtcache_dst_obsolete(rtc, dir);
|
||||||
|
+
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static unsigned int nf_rtcache_forward(const struct nf_hook_ops *ops,
|
||||||
|
+ struct sk_buff *skb,
|
||||||
|
+ const struct net_device *in,
|
||||||
|
+ const struct net_device *out,
|
||||||
|
+ int (*okfn)(struct sk_buff *))
|
||||||
|
+{
|
||||||
|
+ struct nf_conn_rtcache *rtc;
|
||||||
|
+ enum ip_conntrack_info ctinfo;
|
||||||
|
+ enum ip_conntrack_dir dir;
|
||||||
|
+ struct nf_conn *ct;
|
||||||
|
+ int iif;
|
||||||
|
+
|
||||||
|
+ ct = nf_ct_get(skb, &ctinfo);
|
||||||
|
+ if (!ct)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ if (!nf_ct_is_confirmed(ct)) {
|
||||||
|
+ if (WARN_ON(nf_ct_rtcache_find(ct)))
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+ nf_ct_rtcache_ext_add(ct);
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ rtc = nf_ct_rtcache_find_usable(ct);
|
||||||
|
+ if (!rtc)
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ dir = CTINFO2DIR(ctinfo);
|
||||||
|
+ iif = nf_conn_rtcache_iif_get(rtc, dir);
|
||||||
|
+ pr_debug("ct %p, skb %p, dir %d, iif %d, cached iif %d\n",
|
||||||
|
+ ct, skb, dir, iif, in->ifindex);
|
||||||
|
+ if (likely(in->ifindex == iif))
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+
|
||||||
|
+ nf_conn_rtcache_dst_set(ops->pf, rtc, skb_dst(skb), dir, in->ifindex);
|
||||||
|
+ return NF_ACCEPT;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_rtcache_dst_remove(struct nf_conn *ct, void *data)
|
||||||
|
+{
|
||||||
|
+ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct);
|
||||||
|
+ struct net_device *dev = data;
|
||||||
|
+
|
||||||
|
+ if (!rtc)
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ if (dev->ifindex == rtc->cached_dst[IP_CT_DIR_ORIGINAL].iif ||
|
||||||
|
+ dev->ifindex == rtc->cached_dst[IP_CT_DIR_REPLY].iif) {
|
||||||
|
+ nf_conn_rtcache_dst_obsolete(rtc, IP_CT_DIR_ORIGINAL);
|
||||||
|
+ nf_conn_rtcache_dst_obsolete(rtc, IP_CT_DIR_REPLY);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_rtcache_netdev_event(struct notifier_block *this,
|
||||||
|
+ unsigned long event, void *ptr)
|
||||||
|
+{
|
||||||
|
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
|
||||||
|
+ struct net *net = dev_net(dev);
|
||||||
|
+
|
||||||
|
+ if (event == NETDEV_DOWN)
|
||||||
|
+ nf_ct_iterate_cleanup(net, nf_rtcache_dst_remove, dev, 0, 0);
|
||||||
|
+
|
||||||
|
+ return NOTIFY_DONE;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct notifier_block nf_rtcache_notifier = {
|
||||||
|
+ .notifier_call = nf_rtcache_netdev_event,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static struct nf_hook_ops rtcache_ops[] = {
|
||||||
|
+ {
|
||||||
|
+ .hook = nf_rtcache_in,
|
||||||
|
+ .owner = THIS_MODULE,
|
||||||
|
+ .pf = NFPROTO_IPV4,
|
||||||
|
+ .hooknum = NF_INET_PRE_ROUTING,
|
||||||
|
+ .priority = NF_IP_PRI_LAST,
|
||||||
|
+ },
|
||||||
|
+ {
|
||||||
|
+ .hook = nf_rtcache_forward,
|
||||||
|
+ .owner = THIS_MODULE,
|
||||||
|
+ .pf = NFPROTO_IPV4,
|
||||||
|
+ .hooknum = NF_INET_FORWARD,
|
||||||
|
+ .priority = NF_IP_PRI_LAST,
|
||||||
|
+ },
|
||||||
|
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
|
||||||
|
+ {
|
||||||
|
+ .hook = nf_rtcache_in,
|
||||||
|
+ .owner = THIS_MODULE,
|
||||||
|
+ .pf = NFPROTO_IPV6,
|
||||||
|
+ .hooknum = NF_INET_PRE_ROUTING,
|
||||||
|
+ .priority = NF_IP_PRI_LAST,
|
||||||
|
+ },
|
||||||
|
+ {
|
||||||
|
+ .hook = nf_rtcache_forward,
|
||||||
|
+ .owner = THIS_MODULE,
|
||||||
|
+ .pf = NFPROTO_IPV6,
|
||||||
|
+ .hooknum = NF_INET_FORWARD,
|
||||||
|
+ .priority = NF_IP_PRI_LAST,
|
||||||
|
+ },
|
||||||
|
+#endif
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static struct nf_ct_ext_type rtcache_extend __read_mostly = {
|
||||||
|
+ .len = sizeof(struct nf_conn_rtcache),
|
||||||
|
+ .align = __alignof__(struct nf_conn_rtcache),
|
||||||
|
+ .id = NF_CT_EXT_RTCACHE,
|
||||||
|
+ .destroy = nf_conn_rtcache_destroy,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static int __init nf_conntrack_rtcache_init(void)
|
||||||
|
+{
|
||||||
|
+ int ret = nf_ct_extend_register(&rtcache_extend);
|
||||||
|
+
|
||||||
|
+ if (ret < 0) {
|
||||||
|
+ pr_err("nf_conntrack_rtcache: Unable to register extension\n");
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ ret = nf_register_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops));
|
||||||
|
+ if (ret < 0) {
|
||||||
|
+ nf_ct_extend_unregister(&rtcache_extend);
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ ret = register_netdevice_notifier(&nf_rtcache_notifier);
|
||||||
|
+ if (ret) {
|
||||||
|
+ nf_unregister_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops));
|
||||||
|
+ nf_ct_extend_unregister(&rtcache_extend);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int nf_rtcache_ext_remove(struct nf_conn *ct, void *data)
|
||||||
|
+{
|
||||||
|
+ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct);
|
||||||
|
+
|
||||||
|
+ return rtc != NULL;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static bool __exit nf_conntrack_rtcache_wait_for_dying(struct net *net)
|
||||||
|
+{
|
||||||
|
+ bool wait = false;
|
||||||
|
+ int cpu;
|
||||||
|
+
|
||||||
|
+ for_each_possible_cpu(cpu) {
|
||||||
|
+ struct nf_conntrack_tuple_hash *h;
|
||||||
|
+ struct hlist_nulls_node *n;
|
||||||
|
+ struct nf_conn *ct;
|
||||||
|
+ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
|
||||||
|
+
|
||||||
|
+ rcu_read_lock();
|
||||||
|
+ spin_lock_bh(&pcpu->lock);
|
||||||
|
+
|
||||||
|
+ hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
|
||||||
|
+ ct = nf_ct_tuplehash_to_ctrack(h);
|
||||||
|
+ if (nf_ct_rtcache_find(ct) != NULL) {
|
||||||
|
+ wait = true;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ spin_unlock_bh(&pcpu->lock);
|
||||||
|
+ rcu_read_unlock();
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return wait;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void __exit nf_conntrack_rtcache_fini(void)
|
||||||
|
+{
|
||||||
|
+ struct net *net;
|
||||||
|
+ int count = 0;
|
||||||
|
+
|
||||||
|
+ /* remove hooks so no new connections get rtcache extension */
|
||||||
|
+ nf_unregister_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops));
|
||||||
|
+
|
||||||
|
+ synchronize_net();
|
||||||
|
+
|
||||||
|
+ unregister_netdevice_notifier(&nf_rtcache_notifier);
|
||||||
|
+
|
||||||
|
+ rtnl_lock();
|
||||||
|
+
|
||||||
|
+ /* zap all conntracks with rtcache extension */
|
||||||
|
+ for_each_net(net)
|
||||||
|
+ nf_ct_iterate_cleanup(net, nf_rtcache_ext_remove, NULL, 0, 0);
|
||||||
|
+
|
||||||
|
+ for_each_net(net) {
|
||||||
|
+ /* .. and make sure they're gone from dying list, too */
|
||||||
|
+ while (nf_conntrack_rtcache_wait_for_dying(net)) {
|
||||||
|
+ msleep(200);
|
||||||
|
+ WARN_ONCE(++count > 25, "Waiting for all rtcache conntracks to go away\n");
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ rtnl_unlock();
|
||||||
|
+ synchronize_net();
|
||||||
|
+ nf_ct_extend_unregister(&rtcache_extend);
|
||||||
|
+}
|
||||||
|
+module_init(nf_conntrack_rtcache_init);
|
||||||
|
+module_exit(nf_conntrack_rtcache_fini);
|
||||||
|
+
|
||||||
|
+MODULE_LICENSE("GPL");
|
||||||
|
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
|
||||||
|
+MODULE_DESCRIPTION("Conntrack route cache extension");
|
@ -1,6 +1,6 @@
|
|||||||
--- a/net/netfilter/Kconfig
|
--- a/net/netfilter/Kconfig
|
||||||
+++ b/net/netfilter/Kconfig
|
+++ b/net/netfilter/Kconfig
|
||||||
@@ -198,7 +198,6 @@ config NF_CONNTRACK_FTP
|
@@ -210,7 +210,6 @@ config NF_CONNTRACK_FTP
|
||||||
|
|
||||||
config NF_CONNTRACK_H323
|
config NF_CONNTRACK_H323
|
||||||
tristate "H.323 protocol support"
|
tristate "H.323 protocol support"
|
||||||
@ -8,7 +8,7 @@
|
|||||||
depends on NETFILTER_ADVANCED
|
depends on NETFILTER_ADVANCED
|
||||||
help
|
help
|
||||||
H.323 is a VoIP signalling protocol from ITU-T. As one of the most
|
H.323 is a VoIP signalling protocol from ITU-T. As one of the most
|
||||||
@@ -895,7 +894,6 @@ config NETFILTER_XT_TARGET_SECMARK
|
@@ -907,7 +906,6 @@ config NETFILTER_XT_TARGET_SECMARK
|
||||||
|
|
||||||
config NETFILTER_XT_TARGET_TCPMSS
|
config NETFILTER_XT_TARGET_TCPMSS
|
||||||
tristate '"TCPMSS" target support'
|
tristate '"TCPMSS" target support'
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
--- a/net/netfilter/Kconfig
|
--- a/net/netfilter/Kconfig
|
||||||
+++ b/net/netfilter/Kconfig
|
+++ b/net/netfilter/Kconfig
|
||||||
@@ -1369,6 +1369,27 @@ config NETFILTER_XT_MATCH_STATE
|
@@ -1381,6 +1381,27 @@ config NETFILTER_XT_MATCH_STATE
|
||||||
|
|
||||||
To compile it as a module, choose M here. If unsure, say N.
|
To compile it as a module, choose M here. If unsure, say N.
|
||||||
|
|
||||||
@ -30,7 +30,7 @@
|
|||||||
depends on NETFILTER_ADVANCED
|
depends on NETFILTER_ADVANCED
|
||||||
--- a/net/netfilter/Makefile
|
--- a/net/netfilter/Makefile
|
||||||
+++ b/net/netfilter/Makefile
|
+++ b/net/netfilter/Makefile
|
||||||
@@ -162,6 +162,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT)
|
@@ -165,6 +165,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT)
|
||||||
obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o
|
obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o
|
||||||
obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o
|
obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o
|
||||||
obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o
|
obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
--- a/net/netfilter/Kconfig
|
--- a/net/netfilter/Kconfig
|
||||||
+++ b/net/netfilter/Kconfig
|
+++ b/net/netfilter/Kconfig
|
||||||
@@ -1175,6 +1175,27 @@ config NETFILTER_XT_MATCH_L2TP
|
@@ -1187,6 +1187,27 @@ config NETFILTER_XT_MATCH_L2TP
|
||||||
|
|
||||||
To compile it as a module, choose M here. If unsure, say N.
|
To compile it as a module, choose M here. If unsure, say N.
|
||||||
|
|
||||||
@ -28,7 +28,7 @@
|
|||||||
config NETFILTER_XT_MATCH_LENGTH
|
config NETFILTER_XT_MATCH_LENGTH
|
||||||
tristate '"length" match support'
|
tristate '"length" match support'
|
||||||
depends on NETFILTER_ADVANCED
|
depends on NETFILTER_ADVANCED
|
||||||
@@ -1369,26 +1390,11 @@ config NETFILTER_XT_MATCH_STATE
|
@@ -1381,26 +1402,11 @@ config NETFILTER_XT_MATCH_STATE
|
||||||
|
|
||||||
To compile it as a module, choose M here. If unsure, say N.
|
To compile it as a module, choose M here. If unsure, say N.
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
+#endif /* XT_ID_H */
|
+#endif /* XT_ID_H */
|
||||||
--- a/net/netfilter/Kconfig
|
--- a/net/netfilter/Kconfig
|
||||||
+++ b/net/netfilter/Kconfig
|
+++ b/net/netfilter/Kconfig
|
||||||
@@ -1145,6 +1145,13 @@ config NETFILTER_XT_MATCH_IPCOMP
|
@@ -1157,6 +1157,13 @@ config NETFILTER_XT_MATCH_IPCOMP
|
||||||
|
|
||||||
To compile it as a module, choose M here. If unsure, say N.
|
To compile it as a module, choose M here. If unsure, say N.
|
||||||
|
|
||||||
@ -37,7 +37,7 @@
|
|||||||
depends on NETFILTER_ADVANCED
|
depends on NETFILTER_ADVANCED
|
||||||
--- a/net/netfilter/Makefile
|
--- a/net/netfilter/Makefile
|
||||||
+++ b/net/netfilter/Makefile
|
+++ b/net/netfilter/Makefile
|
||||||
@@ -140,6 +140,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) +=
|
@@ -143,6 +143,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) +=
|
||||||
obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
|
obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
|
||||||
obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
|
obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
|
||||||
obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o
|
obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o
|
||||||
|
Loading…
Reference in New Issue
Block a user