2023-03-21 06:51:03 +09:00
|
|
|
From 93147736b5b3a21bea24313bfc7a696829932009 Mon Sep 17 00:00:00 2001
|
|
|
|
From: Yu Zhao <yuzhao@google.com>
|
|
|
|
Date: Wed, 21 Dec 2022 21:19:05 -0700
|
|
|
|
Subject: [PATCH 27/29] mm: multi-gen LRU: clarify scan_control flags
|
|
|
|
|
|
|
|
Among the flags in scan_control:
|
|
|
|
1. sc->may_swap, which indicates swap constraint due to memsw.max, is
|
|
|
|
supported as usual.
|
|
|
|
2. sc->proactive, which indicates reclaim by memory.reclaim, may not
|
|
|
|
opportunistically skip the aging path, since it is considered less
|
|
|
|
latency sensitive.
|
|
|
|
3. !(sc->gfp_mask & __GFP_IO), which indicates IO constraint, lowers
|
|
|
|
swappiness to prioritize file LRU, since clean file pages are more
|
|
|
|
likely to exist.
|
|
|
|
4. sc->may_writepage and sc->may_unmap, which indicates opportunistic
|
|
|
|
reclaim, are rejected, since unmapped clean pages are already
|
|
|
|
prioritized. Scanning for more of them is likely futile and can
|
|
|
|
cause high reclaim latency when there is a large number of memcgs.
|
|
|
|
|
|
|
|
The rest are handled by the existing code.
|
|
|
|
|
|
|
|
Link: https://lkml.kernel.org/r/20221222041905.2431096-8-yuzhao@google.com
|
|
|
|
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
|
|
|
Cc: Johannes Weiner <hannes@cmpxchg.org>
|
|
|
|
Cc: Jonathan Corbet <corbet@lwn.net>
|
|
|
|
Cc: Michael Larabel <Michael@MichaelLarabel.com>
|
|
|
|
Cc: Michal Hocko <mhocko@kernel.org>
|
|
|
|
Cc: Mike Rapoport <rppt@kernel.org>
|
|
|
|
Cc: Roman Gushchin <roman.gushchin@linux.dev>
|
|
|
|
Cc: Suren Baghdasaryan <surenb@google.com>
|
|
|
|
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
|
|
|
---
|
|
|
|
mm/vmscan.c | 55 +++++++++++++++++++++++++++--------------------------
|
|
|
|
1 file changed, 28 insertions(+), 27 deletions(-)
|
|
|
|
|
|
|
|
--- a/mm/vmscan.c
|
|
|
|
+++ b/mm/vmscan.c
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -2905,6 +2905,9 @@ static int get_swappiness(struct lruvec
|
2023-03-21 06:51:03 +09:00
|
|
|
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
|
|
|
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
|
|
|
|
|
|
|
+ if (!sc->may_swap)
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
if (!can_demote(pgdat->node_id, sc) &&
|
|
|
|
mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH)
|
|
|
|
return 0;
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -3952,7 +3955,7 @@ static void walk_mm(struct lruvec *lruve
|
2023-03-21 06:51:03 +09:00
|
|
|
} while (err == -EAGAIN);
|
|
|
|
}
|
|
|
|
|
|
|
|
-static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat)
|
|
|
|
+static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat, bool force_alloc)
|
|
|
|
{
|
|
|
|
struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk;
|
|
|
|
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -3960,7 +3963,7 @@ static struct lru_gen_mm_walk *set_mm_wa
|
2023-03-21 06:51:03 +09:00
|
|
|
VM_WARN_ON_ONCE(walk);
|
|
|
|
|
|
|
|
walk = &pgdat->mm_walk;
|
|
|
|
- } else if (!pgdat && !walk) {
|
|
|
|
+ } else if (!walk && force_alloc) {
|
|
|
|
VM_WARN_ON_ONCE(current_is_kswapd());
|
|
|
|
|
|
|
|
walk = kzalloc(sizeof(*walk), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -4146,7 +4149,7 @@ static bool try_to_inc_max_seq(struct lr
|
2023-03-21 06:51:03 +09:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
- walk = set_mm_walk(NULL);
|
|
|
|
+ walk = set_mm_walk(NULL, true);
|
|
|
|
if (!walk) {
|
|
|
|
success = iterate_mm_list_nowalk(lruvec, max_seq);
|
|
|
|
goto done;
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -4215,8 +4218,6 @@ static bool lruvec_is_reclaimable(struct
|
2023-03-21 06:51:03 +09:00
|
|
|
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
|
|
|
DEFINE_MIN_SEQ(lruvec);
|
|
|
|
|
|
|
|
- VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
|
|
|
|
-
|
|
|
|
/* see the comment on lru_gen_page */
|
|
|
|
gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
|
|
|
|
birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -4472,12 +4473,8 @@ static bool isolate_page(struct lruvec *
|
2023-03-21 06:51:03 +09:00
|
|
|
{
|
|
|
|
bool success;
|
|
|
|
|
|
|
|
- /* unmapping inhibited */
|
|
|
|
- if (!sc->may_unmap && page_mapped(page))
|
|
|
|
- return false;
|
|
|
|
-
|
|
|
|
/* swapping inhibited */
|
|
|
|
- if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) &&
|
|
|
|
+ if (!(sc->gfp_mask & __GFP_IO) &&
|
|
|
|
(PageDirty(page) ||
|
|
|
|
(PageAnon(page) && !PageSwapCache(page))))
|
|
|
|
return false;
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -4574,9 +4571,8 @@ static int scan_pages(struct lruvec *lru
|
2023-03-21 06:51:03 +09:00
|
|
|
__count_vm_events(PGSCAN_ANON + type, isolated);
|
|
|
|
|
|
|
|
/*
|
|
|
|
- * There might not be eligible pages due to reclaim_idx, may_unmap and
|
|
|
|
- * may_writepage. Check the remaining to prevent livelock if it's not
|
|
|
|
- * making progress.
|
|
|
|
+ * There might not be eligible pages due to reclaim_idx. Check the
|
|
|
|
+ * remaining to prevent livelock if it's not making progress.
|
|
|
|
*/
|
|
|
|
return isolated || !remaining ? scanned : 0;
|
|
|
|
}
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -4836,8 +4832,7 @@ static long get_nr_to_scan(struct lruvec
|
2023-03-21 06:51:03 +09:00
|
|
|
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
|
|
|
DEFINE_MAX_SEQ(lruvec);
|
|
|
|
|
|
|
|
- if (mem_cgroup_below_min(memcg) ||
|
|
|
|
- (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
|
|
|
|
+ if (mem_cgroup_below_min(memcg))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan))
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -4865,17 +4860,14 @@ static bool try_to_shrink_lruvec(struct
|
2023-03-21 06:51:03 +09:00
|
|
|
long nr_to_scan;
|
|
|
|
unsigned long scanned = 0;
|
|
|
|
unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
|
|
|
|
+ int swappiness = get_swappiness(lruvec, sc);
|
|
|
|
+
|
|
|
|
+ /* clean file pages are more likely to exist */
|
|
|
|
+ if (swappiness && !(sc->gfp_mask & __GFP_IO))
|
|
|
|
+ swappiness = 1;
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
int delta;
|
|
|
|
- int swappiness;
|
|
|
|
-
|
|
|
|
- if (sc->may_swap)
|
|
|
|
- swappiness = get_swappiness(lruvec, sc);
|
|
|
|
- else if (!cgroup_reclaim(sc) && get_swappiness(lruvec, sc))
|
|
|
|
- swappiness = 1;
|
|
|
|
- else
|
|
|
|
- swappiness = 0;
|
|
|
|
|
|
|
|
nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
|
|
|
|
if (nr_to_scan <= 0)
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -5005,12 +4997,13 @@ static void lru_gen_shrink_lruvec(struct
|
2023-03-21 06:51:03 +09:00
|
|
|
struct blk_plug plug;
|
|
|
|
|
|
|
|
VM_WARN_ON_ONCE(global_reclaim(sc));
|
|
|
|
+ VM_WARN_ON_ONCE(!sc->may_writepage || !sc->may_unmap);
|
|
|
|
|
|
|
|
lru_add_drain();
|
|
|
|
|
|
|
|
blk_start_plug(&plug);
|
|
|
|
|
|
|
|
- set_mm_walk(lruvec_pgdat(lruvec));
|
|
|
|
+ set_mm_walk(NULL, false);
|
|
|
|
|
|
|
|
if (try_to_shrink_lruvec(lruvec, sc))
|
|
|
|
lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG);
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -5066,11 +5059,19 @@ static void lru_gen_shrink_node(struct p
|
2023-03-21 06:51:03 +09:00
|
|
|
|
|
|
|
VM_WARN_ON_ONCE(!global_reclaim(sc));
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * Unmapped clean pages are already prioritized. Scanning for more of
|
|
|
|
+ * them is likely futile and can cause high reclaim latency when there
|
|
|
|
+ * is a large number of memcgs.
|
|
|
|
+ */
|
|
|
|
+ if (!sc->may_writepage || !sc->may_unmap)
|
|
|
|
+ goto done;
|
|
|
|
+
|
|
|
|
lru_add_drain();
|
|
|
|
|
|
|
|
blk_start_plug(&plug);
|
|
|
|
|
|
|
|
- set_mm_walk(pgdat);
|
|
|
|
+ set_mm_walk(pgdat, false);
|
|
|
|
|
|
|
|
set_initial_priority(pgdat, sc);
|
|
|
|
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -5088,7 +5089,7 @@ static void lru_gen_shrink_node(struct p
|
2023-03-21 06:51:03 +09:00
|
|
|
clear_mm_walk();
|
|
|
|
|
|
|
|
blk_finish_plug(&plug);
|
|
|
|
-
|
|
|
|
+done:
|
|
|
|
/* kswapd should never fail */
|
|
|
|
pgdat->kswapd_failures = 0;
|
|
|
|
}
|
2023-03-25 17:24:27 +01:00
|
|
|
@@ -5656,7 +5657,7 @@ static ssize_t lru_gen_seq_write(struct
|
2023-03-21 06:51:03 +09:00
|
|
|
set_task_reclaim_state(current, &sc.reclaim_state);
|
|
|
|
flags = memalloc_noreclaim_save();
|
|
|
|
blk_start_plug(&plug);
|
|
|
|
- if (!set_mm_walk(NULL)) {
|
|
|
|
+ if (!set_mm_walk(NULL, true)) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto done;
|
|
|
|
}
|