2023-03-21 06:51:03 +09:00
|
|
|
From ce45f1c4b32cf69b166f56ef5bc6c761e06ed4e5 Mon Sep 17 00:00:00 2001
|
|
|
|
From: Yu Zhao <yuzhao@google.com>
|
|
|
|
Date: Wed, 21 Dec 2022 21:19:01 -0700
|
|
|
|
Subject: [PATCH 23/29] mm: multi-gen LRU: remove eviction fairness safeguard
|
|
|
|
|
|
|
|
Recall that the eviction consumes the oldest generation: first it
|
|
|
|
bucket-sorts pages whose gen counters were updated by the aging and
|
|
|
|
reclaims the rest; then it increments lrugen->min_seq.
|
|
|
|
|
|
|
|
The current eviction fairness safeguard for global reclaim has a
|
|
|
|
dilemma: when there are multiple eligible memcgs, should it continue
|
|
|
|
or stop upon meeting the reclaim goal? If it continues, it overshoots
|
|
|
|
and increases direct reclaim latency; if it stops, it loses fairness
|
|
|
|
between memcgs it has taken memory away from and those it has yet to.
|
|
|
|
|
|
|
|
With memcg LRU, the eviction, while ensuring eventual fairness, will
|
|
|
|
stop upon meeting its goal. Therefore the current eviction fairness
|
|
|
|
safeguard for global reclaim will not be needed.
|
|
|
|
|
|
|
|
Note that memcg LRU only applies to global reclaim. For memcg reclaim,
|
|
|
|
the eviction will continue, even if it is overshooting. This becomes
|
|
|
|
unconditional due to code simplification.
|
|
|
|
|
|
|
|
Link: https://lkml.kernel.org/r/20221222041905.2431096-4-yuzhao@google.com
|
|
|
|
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
|
|
|
Cc: Johannes Weiner <hannes@cmpxchg.org>
|
|
|
|
Cc: Jonathan Corbet <corbet@lwn.net>
|
|
|
|
Cc: Michael Larabel <Michael@MichaelLarabel.com>
|
|
|
|
Cc: Michal Hocko <mhocko@kernel.org>
|
|
|
|
Cc: Mike Rapoport <rppt@kernel.org>
|
|
|
|
Cc: Roman Gushchin <roman.gushchin@linux.dev>
|
|
|
|
Cc: Suren Baghdasaryan <surenb@google.com>
|
|
|
|
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
|
|
|
---
|
|
|
|
mm/vmscan.c | 82 +++++++++++++++--------------------------------------
|
|
|
|
1 file changed, 23 insertions(+), 59 deletions(-)
|
|
|
|
|
|
|
|
--- a/mm/vmscan.c
|
|
|
|
+++ b/mm/vmscan.c
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -443,6 +443,11 @@ static bool cgroup_reclaim(struct scan_c
|
2023-03-21 06:51:03 +09:00
|
|
|
return sc->target_mem_cgroup;
|
|
|
|
}
|
|
|
|
|
|
|
|
+static bool global_reclaim(struct scan_control *sc)
|
|
|
|
+{
|
|
|
|
+ return !sc->target_mem_cgroup || mem_cgroup_is_root(sc->target_mem_cgroup);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
/**
|
|
|
|
* writeback_throttling_sane - is the usual dirty throttling mechanism available?
|
|
|
|
* @sc: scan_control in question
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -493,6 +498,11 @@ static bool cgroup_reclaim(struct scan_c
|
2023-03-21 06:51:03 +09:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
+static bool global_reclaim(struct scan_control *sc)
|
|
|
|
+{
|
|
|
|
+ return true;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
static bool writeback_throttling_sane(struct scan_control *sc)
|
|
|
|
{
|
|
|
|
return true;
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -4722,8 +4732,7 @@ static int isolate_pages(struct lruvec *
|
2023-03-21 06:51:03 +09:00
|
|
|
return scanned;
|
|
|
|
}
|
|
|
|
|
|
|
|
-static int evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
|
|
|
|
- bool *need_swapping)
|
|
|
|
+static int evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
|
|
|
|
{
|
|
|
|
int type;
|
|
|
|
int scanned;
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -4812,9 +4821,6 @@ retry:
|
2023-03-21 06:51:03 +09:00
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
|
|
|
- if (need_swapping && type == LRU_GEN_ANON)
|
|
|
|
- *need_swapping = true;
|
|
|
|
-
|
|
|
|
return scanned;
|
|
|
|
}
|
|
|
|
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -4853,68 +4859,26 @@ done:
|
2023-03-21 06:51:03 +09:00
|
|
|
return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
-static bool should_abort_scan(struct lruvec *lruvec, unsigned long seq,
|
|
|
|
- struct scan_control *sc, bool need_swapping)
|
|
|
|
+static unsigned long get_nr_to_reclaim(struct scan_control *sc)
|
|
|
|
{
|
|
|
|
- int i;
|
|
|
|
- DEFINE_MAX_SEQ(lruvec);
|
|
|
|
-
|
|
|
|
- if (!current_is_kswapd()) {
|
|
|
|
- /* age each memcg once to ensure fairness */
|
|
|
|
- if (max_seq - seq > 1)
|
|
|
|
- return true;
|
|
|
|
-
|
|
|
|
- /* over-swapping can increase allocation latency */
|
|
|
|
- if (sc->nr_reclaimed >= sc->nr_to_reclaim && need_swapping)
|
|
|
|
- return true;
|
|
|
|
-
|
|
|
|
- /* give this thread a chance to exit and free its memory */
|
|
|
|
- if (fatal_signal_pending(current)) {
|
|
|
|
- sc->nr_reclaimed += MIN_LRU_BATCH;
|
|
|
|
- return true;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if (cgroup_reclaim(sc))
|
|
|
|
- return false;
|
|
|
|
- } else if (sc->nr_reclaimed - sc->last_reclaimed < sc->nr_to_reclaim)
|
|
|
|
- return false;
|
|
|
|
-
|
|
|
|
- /* keep scanning at low priorities to ensure fairness */
|
|
|
|
- if (sc->priority > DEF_PRIORITY - 2)
|
|
|
|
- return false;
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * A minimum amount of work was done under global memory pressure. For
|
|
|
|
- * kswapd, it may be overshooting. For direct reclaim, the target isn't
|
|
|
|
- * met, and yet the allocation may still succeed, since kswapd may have
|
|
|
|
- * caught up. In either case, it's better to stop now, and restart if
|
|
|
|
- * necessary.
|
|
|
|
- */
|
|
|
|
- for (i = 0; i <= sc->reclaim_idx; i++) {
|
|
|
|
- unsigned long wmark;
|
|
|
|
- struct zone *zone = lruvec_pgdat(lruvec)->node_zones + i;
|
|
|
|
-
|
|
|
|
- if (!managed_zone(zone))
|
|
|
|
- continue;
|
|
|
|
-
|
|
|
|
- wmark = current_is_kswapd() ? high_wmark_pages(zone) : low_wmark_pages(zone);
|
|
|
|
- if (wmark > zone_page_state(zone, NR_FREE_PAGES))
|
|
|
|
- return false;
|
|
|
|
- }
|
|
|
|
+ /* don't abort memcg reclaim to ensure fairness */
|
|
|
|
+ if (!global_reclaim(sc))
|
|
|
|
+ return -1;
|
|
|
|
|
|
|
|
- sc->nr_reclaimed += MIN_LRU_BATCH;
|
|
|
|
+ /* discount the previous progress for kswapd */
|
|
|
|
+ if (current_is_kswapd())
|
|
|
|
+ return sc->nr_to_reclaim + sc->last_reclaimed;
|
|
|
|
|
|
|
|
- return true;
|
|
|
|
+ return max(sc->nr_to_reclaim, compact_gap(sc->order));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
|
|
|
{
|
|
|
|
struct blk_plug plug;
|
|
|
|
bool need_aging = false;
|
|
|
|
- bool need_swapping = false;
|
|
|
|
unsigned long scanned = 0;
|
|
|
|
unsigned long reclaimed = sc->nr_reclaimed;
|
|
|
|
- DEFINE_MAX_SEQ(lruvec);
|
|
|
|
+ unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
|
|
|
|
|
|
|
|
lru_add_drain();
|
|
|
|
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -4938,7 +4902,7 @@ static void lru_gen_shrink_lruvec(struct
|
2023-03-21 06:51:03 +09:00
|
|
|
if (!nr_to_scan)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
- delta = evict_pages(lruvec, sc, swappiness, &need_swapping);
|
|
|
|
+ delta = evict_pages(lruvec, sc, swappiness);
|
|
|
|
if (!delta)
|
|
|
|
goto done;
|
|
|
|
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -4946,7 +4910,7 @@ static void lru_gen_shrink_lruvec(struct
|
2023-03-21 06:51:03 +09:00
|
|
|
if (scanned >= nr_to_scan)
|
|
|
|
break;
|
|
|
|
|
|
|
|
- if (should_abort_scan(lruvec, max_seq, sc, need_swapping))
|
|
|
|
+ if (sc->nr_reclaimed >= nr_to_reclaim)
|
|
|
|
break;
|
|
|
|
|
|
|
|
cond_resched();
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -5393,7 +5357,7 @@ static int run_eviction(struct lruvec *l
|
2023-03-21 06:51:03 +09:00
|
|
|
if (sc->nr_reclaimed >= nr_to_reclaim)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
- if (!evict_pages(lruvec, sc, swappiness, NULL))
|
|
|
|
+ if (!evict_pages(lruvec, sc, swappiness))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
cond_resched();
|