iproute2: cake: support new operating modes for 17.01

There has been recent significant activity with the cake qdisc of late
Some of that effort is related to upstreaming to kernel & iproute2
mainline but we're not quite there yet.  This commit teaches tc how to
activate and interprete the latest cake operating modes, namely:

ingress mode: Instead of only counting packets that make it past the
shaper, include packets we've decided to drop as well, since they did
arrive with us on the link and took link capacity.
This mode is more suitable for shaping the ingress of a link
(e.g. from ISP) rather than the more normal egress.

ack-filter/ack-filter-aggressive: Filter excessive TCP ACKS.  Useful in
highly assymetric links (downstream v upstream capacity) where the
majority of upstream link capacity is occupied with ACKS for downstream
traffic.

Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
This commit is contained in:
Kevin Darbyshire-Bryant 2017-12-22 13:45:11 +00:00 committed by John Crispin
parent 4f1dca9eca
commit c4e9487cf5
2 changed files with 118 additions and 38 deletions

View File

@ -9,7 +9,7 @@ include $(TOPDIR)/rules.mk
PKG_NAME:=iproute2
PKG_VERSION:=4.4.0
PKG_RELEASE:=9
PKG_RELEASE:=10
PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.xz
PKG_SOURCE_URL:=@KERNEL/linux/utils/net/iproute2

View File

@ -49,9 +49,9 @@ index 8d2530d..c55a9a8 100644
+ __u32 way_indirect_hits[TC_CAKE_MAX_TINS];
+ __u32 way_misses [TC_CAKE_MAX_TINS];
+ __u32 way_collisions [TC_CAKE_MAX_TINS];
+ __u32 peak_delay_us [TC_CAKE_MAX_TINS]; /* ~= delay to bulk flows */
+ __u32 peak_delay_us [TC_CAKE_MAX_TINS]; /* ~= bulk flow delay */
+ __u32 avge_delay_us [TC_CAKE_MAX_TINS];
+ __u32 base_delay_us [TC_CAKE_MAX_TINS]; /* ~= delay to sparse flows */
+ __u32 base_delay_us [TC_CAKE_MAX_TINS]; /* ~= sparse flows delay */
+ __u16 sparse_flows [TC_CAKE_MAX_TINS];
+ __u16 bulk_flows [TC_CAKE_MAX_TINS];
+ __u16 unresponse_flows [TC_CAKE_MAX_TINS]; /* v4 - was u32 last_len */
@ -60,6 +60,7 @@ index 8d2530d..c55a9a8 100644
+ __u32 capacity_estimate; /* version 2 */
+ __u32 memory_limit; /* version 3 */
+ __u32 memory_used; /* version 3 */
+ struct tc_cake_traffic_stats ack_drops [TC_CAKE_MAX_TINS]; /* v5 */
+};
+
#endif
@ -80,7 +81,7 @@ new file mode 100644
index 0000000..acbe56c
--- /dev/null
+++ b/tc/q_cake.c
@@ -0,0 +1,692 @@
@@ -0,0 +1,771 @@
+/*
+ * Common Applications Kept Enhanced -- CAKE
+ *
@ -144,9 +145,10 @@ index 0000000..acbe56c
+" dual-srchost | dual-dsthost | triple-isolate* ]\n"
+" [ nat | nonat* ]\n"
+" [ wash | nowash * ]\n"
+" [ ack-filter | ack-filter-aggressive | no-ack-filter * ]\n"
+" [ memlimit LIMIT ]\n"
+" [ ptm | atm | noatm* ] [ overhead N | conservative | raw* ]\n"
+" [ mpu N ]\n"
+" [ mpu N ] [ ingress | egress* ]\n"
+" (* marks defaults)\n");
+}
+
@ -162,12 +164,14 @@ index 0000000..acbe56c
+ int overhead = 0;
+ bool overhead_set = false;
+ bool overhead_override = false;
+ int wash = -1;
+ int mpu = 0;
+ int flowmode = -1;
+ int nat = -1;
+ int atm = -1;
+ int autorate = -1;
+ int wash = -1;
+ int ingress = -1;
+ int ack_filter = -1;
+ struct rtattr *tail;
+
+ while (argc > 0) {
@ -321,12 +325,22 @@ index 0000000..acbe56c
+ /* Typical VDSL2 framing schemes, both over PTM */
+ /* PTM has 64b/65b coding which absorbs some bandwidth */
+ } else if (strcmp(*argv, "pppoe-ptm") == 0) {
+ /* 2B PPP + 6B PPPoE + 6B dest MAC + 6B src MAC
+ * + 2B ethertype + 4B Frame Check Sequence
+ * + 1B Start of Frame (S) + 1B End of Frame (Ck)
+ * + 2B TC-CRC (PTM-FCS) = 30B
+ */
+ atm = 2;
+ overhead += 27;
+ overhead += 30;
+ overhead_set = true;
+ } else if (strcmp(*argv, "bridged-ptm") == 0) {
+ /* 6B dest MAC + 6B src MAC + 2B ethertype
+ * + 4B Frame Check Sequence
+ * + 1B Start of Frame (S) + 1B End of Frame (Ck)
+ * + 2B TC-CRC (PTM-FCS) = 22B
+ */
+ atm = 2;
+ overhead += 19;
+ overhead += 22;
+ overhead_set = true;
+
+ } else if (strcmp(*argv, "via-ethernet") == 0) {
@ -339,9 +353,27 @@ index 0000000..acbe56c
+ * that automatically, and is thus ignored.
+ *
+ * It would be deleted entirely, but it appears in the
+ * stats output when the automatic compensation is active.
+ * stats output when the automatic compensation is
+ * active.
+ */
+
+ } else if (strcmp(*argv, "total_overhead") == 0) {
+ /*
+ * This is the overhead cake accounts for; added here so
+ * that cake's "tc -s qdisc" output can be directly
+ * pasted into the tc command to instantate a new cake..
+ */
+ NEXT_ARG();
+
+ } else if (strcmp(*argv, "hard_header_len") == 0) {
+ /*
+ * This is the overhead the kernel automatically
+ * accounted for; added here so that cake's "tc -s
+ * qdisc" output can be directly pasted into the tc
+ * command to instantiate a new cake..
+ */
+ NEXT_ARG();
+
+ } else if (strcmp(*argv, "ethernet") == 0) {
+ /* ethernet pre-amble & interframe gap & FCS
+ * you may need to add vlan tag */
@ -357,7 +389,7 @@ index 0000000..acbe56c
+
+ /*
+ * DOCSIS cable shapers account for Ethernet frame with FCS,
+ * but not interframe gap nor preamble.
+ * but not interframe gap or preamble.
+ */
+ } else if (strcmp(*argv, "docsis") == 0) {
+ atm = 0;
@ -384,6 +416,18 @@ index 0000000..acbe56c
+ return -1;
+ }
+
+ } else if (strcmp(*argv, "ingress") == 0) {
+ ingress = 1;
+ } else if (strcmp(*argv, "egress") == 0) {
+ ingress = 0;
+
+ } else if (strcmp(*argv, "no-ack-filter") == 0) {
+ ack_filter = 0;
+ } else if (strcmp(*argv, "ack-filter") == 0) {
+ ack_filter = 0x0200;
+ } else if (strcmp(*argv, "ack-filter-aggressive") == 0) {
+ ack_filter = 0x0600;
+
+ } else if (strcmp(*argv, "memlimit") == 0) {
+ NEXT_ARG();
+ if(get_size(&memlimit, *argv)) {
@ -432,6 +476,10 @@ index 0000000..acbe56c
+ addattr_l(n, 1024, TCA_CAKE_NAT, &nat, sizeof(nat));
+ if (wash != -1)
+ addattr_l(n, 1024, TCA_CAKE_WASH, &wash, sizeof(wash));
+ if (ingress != -1)
+ addattr_l(n, 1024, TCA_CAKE_INGRESS, &ingress, sizeof(ingress));
+ if (ack_filter != -1)
+ addattr_l(n, 1024, TCA_CAKE_ACK_FILTER, &ack_filter, sizeof(ack_filter));
+
+ tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail;
+ return 0;
@ -453,6 +501,8 @@ index 0000000..acbe56c
+ int nat = 0;
+ int autorate = 0;
+ int wash = 0;
+ int ingress = 0;
+ int ack_filter = 0;
+ SPRINT_BUF(b1);
+ SPRINT_BUF(b2);
+
@ -558,6 +608,14 @@ index 0000000..acbe56c
+ RTA_PAYLOAD(tb[TCA_CAKE_MPU]) >= sizeof(__u32)) {
+ mpu = rta_getattr_u32(tb[TCA_CAKE_MPU]);
+ }
+ if (tb[TCA_CAKE_INGRESS] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_INGRESS]) >= sizeof(__u32)) {
+ ingress = rta_getattr_u32(tb[TCA_CAKE_INGRESS]);
+ }
+ if (tb[TCA_CAKE_ACK_FILTER] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_ACK_FILTER]) >= sizeof(__u32)) {
+ ack_filter = rta_getattr_u32(tb[TCA_CAKE_ACK_FILTER]);
+ }
+ if (tb[TCA_CAKE_ETHERNET] &&
+ RTA_PAYLOAD(tb[TCA_CAKE_ETHERNET]) >= sizeof(__u32)) {
+ ethernet = rta_getattr_u32(tb[TCA_CAKE_ETHERNET]);
@ -570,6 +628,14 @@ index 0000000..acbe56c
+ if (wash)
+ fprintf(f,"wash ");
+
+ if (ingress)
+ fprintf(f,"ingress ");
+
+ if (ack_filter == 0x0600)
+ fprintf(f,"ack-filter-aggressive ");
+ else if (ack_filter)
+ fprintf(f,"ack-filter ");
+
+ if (interval)
+ fprintf(f, "rtt %s ", sprint_time(interval, b2));
+
@ -585,12 +651,19 @@ index 0000000..acbe56c
+
+ fprintf(f, "overhead %d ", overhead);
+
+ // This is actually the *amount* of automatic compensation, but we only report
+ // its presence as a boolean for now.
+ /* This is actually the *amount* of automatic compensation, but
+ * we only report its presence as a boolean for now.
+ */
+ if (ethernet)
+ fprintf(f, "via-ethernet ");
+ }
+
+ /* unconditionally report the overhead and hard_header_len overhead the
+ * kernel added automatically
+ */
+ fprintf(f, "total_overhead %d ", overhead);
+ fprintf(f, "hard_header_len %d ", ethernet);
+
+ if (mpu) {
+ fprintf(f, "mpu %d ", mpu);
+ }
@ -677,89 +750,96 @@ index 0000000..acbe56c
+
+ fprintf(f, " thresh ");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12s", sprint_rate(stnc->threshold_rate[i], b1));
+ fprintf(f, " %12s", sprint_rate(stnc->threshold_rate[i], b1));
+ fprintf(f, "\n");
+
+ fprintf(f, " target ");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12s", sprint_time(stnc->target_us[i], b1));
+ fprintf(f, " %12s", sprint_time(stnc->target_us[i], b1));
+ fprintf(f, "\n");
+
+ fprintf(f, " interval");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12s", sprint_time(stnc->interval_us[i], b1));
+ fprintf(f, " %12s", sprint_time(stnc->interval_us[i], b1));
+ fprintf(f, "\n");
+
+ fprintf(f, " pk_delay");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12s", sprint_time(stnc->peak_delay_us[i], b1));
+ fprintf(f, " %12s", sprint_time(stnc->peak_delay_us[i], b1));
+ fprintf(f, "\n");
+
+ fprintf(f, " av_delay");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12s", sprint_time(stnc->avge_delay_us[i], b1));
+ fprintf(f, " %12s", sprint_time(stnc->avge_delay_us[i], b1));
+ fprintf(f, "\n");
+
+ fprintf(f, " sp_delay");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12s", sprint_time(stnc->base_delay_us[i], b1));
+ fprintf(f, " %12s", sprint_time(stnc->base_delay_us[i], b1));
+ fprintf(f, "\n");
+
+ fprintf(f, " pkts ");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->sent[i].packets);
+ fprintf(f, " %12u", stnc->sent[i].packets);
+ fprintf(f, "\n");
+
+ fprintf(f, " bytes ");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12llu", stnc->sent[i].bytes);
+ fprintf(f, " %12llu", stnc->sent[i].bytes);
+ fprintf(f, "\n");
+
+ fprintf(f, " way_inds");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->way_indirect_hits[i]);
+ fprintf(f, " %12u", stnc->way_indirect_hits[i]);
+ fprintf(f, "\n");
+
+ fprintf(f, " way_miss");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->way_misses[i]);
+ fprintf(f, " %12u", stnc->way_misses[i]);
+ fprintf(f, "\n");
+
+ fprintf(f, " way_cols");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->way_collisions[i]);
+ fprintf(f, " %12u", stnc->way_collisions[i]);
+ fprintf(f, "\n");
+
+ fprintf(f, " drops ");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->dropped[i].packets);
+ fprintf(f, " %12u", stnc->dropped[i].packets);
+ fprintf(f, "\n");
+
+ fprintf(f, " marks ");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->ecn_marked[i].packets);
+ fprintf(f, " %12u", stnc->ecn_marked[i].packets);
+ fprintf(f, "\n");
+
+ if(stnc->version >= 5) {
+ fprintf(f, " ack_drop");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, " %12u", stnc->ack_drops[i].packets);
+ fprintf(f, "\n");
+ }
+
+ fprintf(f, " sp_flows");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->sparse_flows[i]);
+ fprintf(f, " %12u", stnc->sparse_flows[i]);
+ fprintf(f, "\n");
+
+ fprintf(f, " bk_flows");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->bulk_flows[i]);
+ fprintf(f, " %12u", stnc->bulk_flows[i]);
+ fprintf(f, "\n");
+
+ if(stnc->version >= 4) {
+ fprintf(f, " un_flows");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->unresponse_flows[i]);
+ fprintf(f, " %12u", stnc->unresponse_flows[i]);
+ fprintf(f, "\n");
+ }
+
+ fprintf(f, " max_len ");
+ for(i=0; i < stnc->tin_cnt; i++)
+ fprintf(f, "%12u", stnc->max_skblen[i]);
+ fprintf(f, " %12u", stnc->max_skblen[i]);
+ fprintf(f, "\n");
+ } else {
+ return -1;