mirror of
https://github.com/openwrt/openwrt.git
synced 2024-12-23 23:42:43 +00:00
kernel: 6.1: Synchronize MGLRU patches with upstream
Replace the refreshed 5.15 backports with backports for 6.1. This fixes FMODE_CAN_ODIRECT having the same value as FMODE_NOREUSE. Signed-off-by: Kazuki Hashimoto <kazukih0205@gmail.com>
This commit is contained in:
parent
0063e71d66
commit
b28b8ed1f4
@ -1,7 +1,7 @@
|
||||
From 348fdbada9fb3f0bf1a53651be46319105af187f Mon Sep 17 00:00:00 2001
|
||||
From 8c20e2eb5f2a0175b774134685e4d7bd93e85ff8 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Wed, 21 Dec 2022 21:18:59 -0700
|
||||
Subject: [PATCH 21/29] mm: multi-gen LRU: rename lru_gen_struct to
|
||||
Subject: [PATCH 01/19] UPSTREAM: mm: multi-gen LRU: rename lru_gen_struct to
|
||||
lru_gen_folio
|
||||
|
||||
Patch series "mm: multi-gen LRU: memcg LRU", v3.
|
||||
@ -115,6 +115,10 @@ Cc: Mike Rapoport <rppt@kernel.org>
|
||||
Cc: Roman Gushchin <roman.gushchin@linux.dev>
|
||||
Cc: Suren Baghdasaryan <surenb@google.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Bug: 274865848
|
||||
(cherry picked from commit 391655fe08d1f942359a11148aa9aaf3f99d6d6f)
|
||||
Change-Id: I7df67e0e2435ba28f10eaa57d28d98b61a9210a6
|
||||
Signed-off-by: T.J. Mercier <tjmercier@google.com>
|
||||
---
|
||||
include/linux/mm_inline.h | 4 ++--
|
||||
include/linux/mmzone.h | 6 +++---
|
||||
@ -122,9 +126,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
mm/workingset.c | 4 ++--
|
||||
4 files changed, 24 insertions(+), 24 deletions(-)
|
||||
|
||||
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
|
||||
index e8ed225d8f7ca..f63968bd7de59 100644
|
||||
--- a/include/linux/mm_inline.h
|
||||
+++ b/include/linux/mm_inline.h
|
||||
@@ -178,7 +178,7 @@ static inline void lru_gen_update_size(s
|
||||
@@ -178,7 +178,7 @@ static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *foli
|
||||
int zone = folio_zonenum(folio);
|
||||
int delta = folio_nr_pages(folio);
|
||||
enum lru_list lru = type * LRU_INACTIVE_FILE;
|
||||
@ -133,7 +139,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS);
|
||||
VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS);
|
||||
@@ -224,7 +224,7 @@ static inline bool lru_gen_add_folio(str
|
||||
@@ -224,7 +224,7 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,
|
||||
int gen = folio_lru_gen(folio);
|
||||
int type = folio_is_file_lru(folio);
|
||||
int zone = folio_zonenum(folio);
|
||||
@ -142,6 +148,8 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
|
||||
|
||||
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
|
||||
index 5f74891556f33..bd3e4689f72dc 100644
|
||||
--- a/include/linux/mmzone.h
|
||||
+++ b/include/linux/mmzone.h
|
||||
@@ -404,7 +404,7 @@ enum {
|
||||
@ -171,9 +179,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
/* to concurrently iterate lru_gen_mm_list */
|
||||
struct lru_gen_mm_state mm_state;
|
||||
#endif
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index d18296109aa7e..27142caf284c1 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -3190,7 +3190,7 @@ static int get_nr_gens(struct lruvec *lr
|
||||
@@ -3190,7 +3190,7 @@ static int get_nr_gens(struct lruvec *lruvec, int type)
|
||||
|
||||
static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
|
||||
{
|
||||
@ -191,7 +201,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
int hist = lru_hist_from_seq(lrugen->min_seq[type]);
|
||||
|
||||
pos->refaulted = lrugen->avg_refaulted[type][tier] +
|
||||
@@ -3611,7 +3611,7 @@ static void read_ctrl_pos(struct lruvec
|
||||
@@ -3611,7 +3611,7 @@ static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain,
|
||||
static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover)
|
||||
{
|
||||
int hist, tier;
|
||||
@ -200,7 +210,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1;
|
||||
unsigned long seq = carryover ? lrugen->min_seq[type] : lrugen->max_seq + 1;
|
||||
|
||||
@@ -3688,7 +3688,7 @@ static int folio_update_gen(struct folio
|
||||
@@ -3688,7 +3688,7 @@ static int folio_update_gen(struct folio *folio, int gen)
|
||||
static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
|
||||
{
|
||||
int type = folio_is_file_lru(folio);
|
||||
@ -209,7 +219,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
|
||||
unsigned long new_flags, old_flags = READ_ONCE(folio->flags);
|
||||
|
||||
@@ -3733,7 +3733,7 @@ static void update_batch_size(struct lru
|
||||
@@ -3733,7 +3733,7 @@ static void update_batch_size(struct lru_gen_mm_walk *walk, struct folio *folio,
|
||||
static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk)
|
||||
{
|
||||
int gen, type, zone;
|
||||
@ -218,7 +228,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
walk->batched = 0;
|
||||
|
||||
@@ -4253,7 +4253,7 @@ static bool inc_min_seq(struct lruvec *l
|
||||
@@ -4250,7 +4250,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
|
||||
{
|
||||
int zone;
|
||||
int remaining = MAX_LRU_BATCH;
|
||||
@ -227,7 +237,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
|
||||
|
||||
if (type == LRU_GEN_ANON && !can_swap)
|
||||
@@ -4289,7 +4289,7 @@ static bool try_to_inc_min_seq(struct lr
|
||||
@@ -4286,7 +4286,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
|
||||
{
|
||||
int gen, type, zone;
|
||||
bool success = false;
|
||||
@ -236,7 +246,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
DEFINE_MIN_SEQ(lruvec);
|
||||
|
||||
VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
|
||||
@@ -4310,7 +4310,7 @@ next:
|
||||
@@ -4307,7 +4307,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
|
||||
;
|
||||
}
|
||||
|
||||
@ -245,7 +255,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
if (can_swap) {
|
||||
min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]);
|
||||
min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]);
|
||||
@@ -4332,7 +4332,7 @@ static void inc_max_seq(struct lruvec *l
|
||||
@@ -4329,7 +4329,7 @@ static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan)
|
||||
{
|
||||
int prev, next;
|
||||
int type, zone;
|
||||
@ -254,7 +264,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
spin_lock_irq(&lruvec->lru_lock);
|
||||
|
||||
@@ -4390,7 +4390,7 @@ static bool try_to_inc_max_seq(struct lr
|
||||
@@ -4387,7 +4387,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
|
||||
bool success;
|
||||
struct lru_gen_mm_walk *walk;
|
||||
struct mm_struct *mm = NULL;
|
||||
@ -263,7 +273,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
VM_WARN_ON_ONCE(max_seq > READ_ONCE(lrugen->max_seq));
|
||||
|
||||
@@ -4455,7 +4455,7 @@ static bool should_run_aging(struct lruv
|
||||
@@ -4452,7 +4452,7 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
|
||||
unsigned long old = 0;
|
||||
unsigned long young = 0;
|
||||
unsigned long total = 0;
|
||||
@ -272,7 +282,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
|
||||
for (type = !can_swap; type < ANON_AND_FILE; type++) {
|
||||
@@ -4740,7 +4740,7 @@ static bool sort_folio(struct lruvec *lr
|
||||
@@ -4737,7 +4737,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx)
|
||||
int delta = folio_nr_pages(folio);
|
||||
int refs = folio_lru_refs(folio);
|
||||
int tier = lru_tier_from_refs(refs);
|
||||
@ -281,7 +291,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
VM_WARN_ON_ONCE_FOLIO(gen >= MAX_NR_GENS, folio);
|
||||
|
||||
@@ -4840,7 +4840,7 @@ static int scan_folios(struct lruvec *lr
|
||||
@@ -4837,7 +4837,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
|
||||
int scanned = 0;
|
||||
int isolated = 0;
|
||||
int remaining = MAX_LRU_BATCH;
|
||||
@ -290,7 +300,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
|
||||
VM_WARN_ON_ONCE(!list_empty(list));
|
||||
@@ -5240,7 +5240,7 @@ done:
|
||||
@@ -5237,7 +5237,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
|
||||
|
||||
static bool __maybe_unused state_is_valid(struct lruvec *lruvec)
|
||||
{
|
||||
@ -299,7 +309,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
if (lrugen->enabled) {
|
||||
enum lru_list lru;
|
||||
@@ -5522,7 +5522,7 @@ static void lru_gen_seq_show_full(struct
|
||||
@@ -5519,7 +5519,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
|
||||
int i;
|
||||
int type, tier;
|
||||
int hist = lru_hist_from_seq(seq);
|
||||
@ -308,7 +318,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
for (tier = 0; tier < MAX_NR_TIERS; tier++) {
|
||||
seq_printf(m, " %10d", tier);
|
||||
@@ -5572,7 +5572,7 @@ static int lru_gen_seq_show(struct seq_f
|
||||
@@ -5569,7 +5569,7 @@ static int lru_gen_seq_show(struct seq_file *m, void *v)
|
||||
unsigned long seq;
|
||||
bool full = !debugfs_real_fops(m->file)->write;
|
||||
struct lruvec *lruvec = v;
|
||||
@ -317,7 +327,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
int nid = lruvec_pgdat(lruvec)->node_id;
|
||||
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
DEFINE_MAX_SEQ(lruvec);
|
||||
@@ -5826,7 +5826,7 @@ void lru_gen_init_lruvec(struct lruvec *
|
||||
@@ -5823,7 +5823,7 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
|
||||
{
|
||||
int i;
|
||||
int gen, type, zone;
|
||||
@ -326,9 +336,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
lrugen->max_seq = MIN_NR_GENS + 1;
|
||||
lrugen->enabled = lru_gen_enabled();
|
||||
diff --git a/mm/workingset.c b/mm/workingset.c
|
||||
index ae7e984b23c6b..688aaa73f64e8 100644
|
||||
--- a/mm/workingset.c
|
||||
+++ b/mm/workingset.c
|
||||
@@ -223,7 +223,7 @@ static void *lru_gen_eviction(struct fol
|
||||
@@ -223,7 +223,7 @@ static void *lru_gen_eviction(struct folio *folio)
|
||||
unsigned long token;
|
||||
unsigned long min_seq;
|
||||
struct lruvec *lruvec;
|
||||
@ -337,7 +349,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
int type = folio_is_file_lru(folio);
|
||||
int delta = folio_nr_pages(folio);
|
||||
int refs = folio_lru_refs(folio);
|
||||
@@ -252,7 +252,7 @@ static void lru_gen_refault(struct folio
|
||||
@@ -252,7 +252,7 @@ static void lru_gen_refault(struct folio *folio, void *shadow)
|
||||
unsigned long token;
|
||||
unsigned long min_seq;
|
||||
struct lruvec *lruvec;
|
||||
@ -346,3 +358,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
struct mem_cgroup *memcg;
|
||||
struct pglist_data *pgdat;
|
||||
int type = folio_is_file_lru(folio);
|
||||
--
|
||||
2.40.1
|
||||
|
@ -1,7 +1,7 @@
|
||||
From afd37e73db04c7e6b47411120ac5f6a7eca51fec Mon Sep 17 00:00:00 2001
|
||||
From 656287d55d9cfc72a4bcd4d9bd098570f12ce409 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Wed, 21 Dec 2022 21:19:00 -0700
|
||||
Subject: [PATCH 22/29] mm: multi-gen LRU: rename lrugen->lists[] to
|
||||
Subject: [PATCH 02/19] UPSTREAM: mm: multi-gen LRU: rename lrugen->lists[] to
|
||||
lrugen->folios[]
|
||||
|
||||
lru_gen_folio will be chained into per-node lists by the coming
|
||||
@ -17,15 +17,54 @@ Cc: Mike Rapoport <rppt@kernel.org>
|
||||
Cc: Roman Gushchin <roman.gushchin@linux.dev>
|
||||
Cc: Suren Baghdasaryan <surenb@google.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Bug: 274865848
|
||||
(cherry picked from commit 6df1b2212950aae2b2188c6645ea18e2a9e3fdd5)
|
||||
Change-Id: I09f53e0fb2cd6b8b3adbb8a80b15dc5efbeae857
|
||||
Signed-off-by: T.J. Mercier <tjmercier@google.com>
|
||||
---
|
||||
include/linux/mm_inline.h | 4 ++--
|
||||
include/linux/mmzone.h | 8 ++++----
|
||||
mm/vmscan.c | 20 ++++++++++----------
|
||||
3 files changed, 16 insertions(+), 16 deletions(-)
|
||||
Documentation/mm/multigen_lru.rst | 8 ++++----
|
||||
include/linux/mm_inline.h | 4 ++--
|
||||
include/linux/mmzone.h | 8 ++++----
|
||||
mm/vmscan.c | 20 ++++++++++----------
|
||||
4 files changed, 20 insertions(+), 20 deletions(-)
|
||||
|
||||
diff --git a/Documentation/mm/multigen_lru.rst b/Documentation/mm/multigen_lru.rst
|
||||
index d7062c6a89464..d8f721f98868a 100644
|
||||
--- a/Documentation/mm/multigen_lru.rst
|
||||
+++ b/Documentation/mm/multigen_lru.rst
|
||||
@@ -89,15 +89,15 @@ variables are monotonically increasing.
|
||||
|
||||
Generation numbers are truncated into ``order_base_2(MAX_NR_GENS+1)``
|
||||
bits in order to fit into the gen counter in ``folio->flags``. Each
|
||||
-truncated generation number is an index to ``lrugen->lists[]``. The
|
||||
+truncated generation number is an index to ``lrugen->folios[]``. The
|
||||
sliding window technique is used to track at least ``MIN_NR_GENS`` and
|
||||
at most ``MAX_NR_GENS`` generations. The gen counter stores a value
|
||||
within ``[1, MAX_NR_GENS]`` while a page is on one of
|
||||
-``lrugen->lists[]``; otherwise it stores zero.
|
||||
+``lrugen->folios[]``; otherwise it stores zero.
|
||||
|
||||
Each generation is divided into multiple tiers. A page accessed ``N``
|
||||
times through file descriptors is in tier ``order_base_2(N)``. Unlike
|
||||
-generations, tiers do not have dedicated ``lrugen->lists[]``. In
|
||||
+generations, tiers do not have dedicated ``lrugen->folios[]``. In
|
||||
contrast to moving across generations, which requires the LRU lock,
|
||||
moving across tiers only involves atomic operations on
|
||||
``folio->flags`` and therefore has a negligible cost. A feedback loop
|
||||
@@ -127,7 +127,7 @@ page mapped by this PTE to ``(max_seq%MAX_NR_GENS)+1``.
|
||||
Eviction
|
||||
--------
|
||||
The eviction consumes old generations. Given an ``lruvec``, it
|
||||
-increments ``min_seq`` when ``lrugen->lists[]`` indexed by
|
||||
+increments ``min_seq`` when ``lrugen->folios[]`` indexed by
|
||||
``min_seq%MAX_NR_GENS`` becomes empty. To select a type and a tier to
|
||||
evict from, it first compares ``min_seq[]`` to select the older type.
|
||||
If both types are equally old, it selects the one whose first tier has
|
||||
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
|
||||
index f63968bd7de59..da38e3d962e2f 100644
|
||||
--- a/include/linux/mm_inline.h
|
||||
+++ b/include/linux/mm_inline.h
|
||||
@@ -256,9 +256,9 @@ static inline bool lru_gen_add_folio(str
|
||||
@@ -256,9 +256,9 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,
|
||||
lru_gen_update_size(lruvec, folio, -1, gen);
|
||||
/* for folio_rotate_reclaimable() */
|
||||
if (reclaiming)
|
||||
@ -37,6 +76,8 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
return true;
|
||||
}
|
||||
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
|
||||
index bd3e4689f72dc..02e4323744715 100644
|
||||
--- a/include/linux/mmzone.h
|
||||
+++ b/include/linux/mmzone.h
|
||||
@@ -312,7 +312,7 @@ enum lruvec_flags {
|
||||
@ -68,9 +109,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
/* the multi-gen LRU sizes, eventually consistent */
|
||||
long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
|
||||
/* the exponential moving average of refaulted */
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index 27142caf284c1..b02fed912f742 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -4261,7 +4261,7 @@ static bool inc_min_seq(struct lruvec *l
|
||||
@@ -4258,7 +4258,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
|
||||
|
||||
/* prevent cold/hot inversion if force_scan is true */
|
||||
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
|
||||
@ -79,7 +122,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
while (!list_empty(head)) {
|
||||
struct folio *folio = lru_to_folio(head);
|
||||
@@ -4272,7 +4272,7 @@ static bool inc_min_seq(struct lruvec *l
|
||||
@@ -4269,7 +4269,7 @@ static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
|
||||
VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
|
||||
|
||||
new_gen = folio_inc_gen(lruvec, folio, false);
|
||||
@ -88,7 +131,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
if (!--remaining)
|
||||
return false;
|
||||
@@ -4300,7 +4300,7 @@ static bool try_to_inc_min_seq(struct lr
|
||||
@@ -4297,7 +4297,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
|
||||
gen = lru_gen_from_seq(min_seq[type]);
|
||||
|
||||
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
|
||||
@ -97,7 +140,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
goto next;
|
||||
}
|
||||
|
||||
@@ -4765,7 +4765,7 @@ static bool sort_folio(struct lruvec *lr
|
||||
@@ -4762,7 +4762,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx)
|
||||
|
||||
/* promoted */
|
||||
if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
|
||||
@ -106,7 +149,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -4774,7 +4774,7 @@ static bool sort_folio(struct lruvec *lr
|
||||
@@ -4771,7 +4771,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx)
|
||||
int hist = lru_hist_from_seq(lrugen->min_seq[type]);
|
||||
|
||||
gen = folio_inc_gen(lruvec, folio, false);
|
||||
@ -115,7 +158,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
|
||||
lrugen->protected[hist][type][tier - 1] + delta);
|
||||
@@ -4786,7 +4786,7 @@ static bool sort_folio(struct lruvec *lr
|
||||
@@ -4783,7 +4783,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx)
|
||||
if (folio_test_locked(folio) || folio_test_writeback(folio) ||
|
||||
(type == LRU_GEN_FILE && folio_test_dirty(folio))) {
|
||||
gen = folio_inc_gen(lruvec, folio, true);
|
||||
@ -124,7 +167,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -4853,7 +4853,7 @@ static int scan_folios(struct lruvec *lr
|
||||
@@ -4850,7 +4850,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
|
||||
for (zone = sc->reclaim_idx; zone >= 0; zone--) {
|
||||
LIST_HEAD(moved);
|
||||
int skipped = 0;
|
||||
@ -133,7 +176,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
while (!list_empty(head)) {
|
||||
struct folio *folio = lru_to_folio(head);
|
||||
@@ -5253,7 +5253,7 @@ static bool __maybe_unused state_is_vali
|
||||
@@ -5250,7 +5250,7 @@ static bool __maybe_unused state_is_valid(struct lruvec *lruvec)
|
||||
int gen, type, zone;
|
||||
|
||||
for_each_gen_type_zone(gen, type, zone) {
|
||||
@ -142,7 +185,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -5298,7 +5298,7 @@ static bool drain_evictable(struct lruve
|
||||
@@ -5295,7 +5295,7 @@ static bool drain_evictable(struct lruvec *lruvec)
|
||||
int remaining = MAX_LRU_BATCH;
|
||||
|
||||
for_each_gen_type_zone(gen, type, zone) {
|
||||
@ -151,7 +194,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
while (!list_empty(head)) {
|
||||
bool success;
|
||||
@@ -5835,7 +5835,7 @@ void lru_gen_init_lruvec(struct lruvec *
|
||||
@@ -5832,7 +5832,7 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
|
||||
lrugen->timestamps[i] = jiffies;
|
||||
|
||||
for_each_gen_type_zone(gen, type, zone)
|
||||
@ -160,3 +203,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
lruvec->mm_state.seq = MIN_NR_GENS;
|
||||
init_waitqueue_head(&lruvec->mm_state.wait);
|
||||
--
|
||||
2.40.1
|
||||
|
@ -1,7 +1,8 @@
|
||||
From ce45f1c4b32cf69b166f56ef5bc6c761e06ed4e5 Mon Sep 17 00:00:00 2001
|
||||
From 14f9a7a15f3d1af351f30e0438fd747b7ac253b0 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Wed, 21 Dec 2022 21:19:01 -0700
|
||||
Subject: [PATCH 23/29] mm: multi-gen LRU: remove eviction fairness safeguard
|
||||
Subject: [PATCH 03/19] UPSTREAM: mm: multi-gen LRU: remove eviction fairness
|
||||
safeguard
|
||||
|
||||
Recall that the eviction consumes the oldest generation: first it
|
||||
bucket-sorts folios whose gen counters were updated by the aging and
|
||||
@ -31,13 +32,19 @@ Cc: Mike Rapoport <rppt@kernel.org>
|
||||
Cc: Roman Gushchin <roman.gushchin@linux.dev>
|
||||
Cc: Suren Baghdasaryan <surenb@google.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Bug: 274865848
|
||||
(cherry picked from commit a579086c99ed70cc4bfc104348dbe3dd8f2787e6)
|
||||
Change-Id: I08ac1b3c90e29cafd0566785aaa4bcdb5db7d22c
|
||||
Signed-off-by: T.J. Mercier <tjmercier@google.com>
|
||||
---
|
||||
mm/vmscan.c | 82 +++++++++++++++--------------------------------------
|
||||
1 file changed, 23 insertions(+), 59 deletions(-)
|
||||
mm/vmscan.c | 81 +++++++++++++++--------------------------------------
|
||||
1 file changed, 23 insertions(+), 58 deletions(-)
|
||||
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index b02fed912f742..991961180b320 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -448,6 +448,11 @@ static bool cgroup_reclaim(struct scan_c
|
||||
@@ -448,6 +448,11 @@ static bool cgroup_reclaim(struct scan_control *sc)
|
||||
return sc->target_mem_cgroup;
|
||||
}
|
||||
|
||||
@ -49,7 +56,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
/**
|
||||
* writeback_throttling_sane - is the usual dirty throttling mechanism available?
|
||||
* @sc: scan_control in question
|
||||
@@ -498,6 +503,11 @@ static bool cgroup_reclaim(struct scan_c
|
||||
@@ -498,6 +503,11 @@ static bool cgroup_reclaim(struct scan_control *sc)
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -61,7 +68,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
static bool writeback_throttling_sane(struct scan_control *sc)
|
||||
{
|
||||
return true;
|
||||
@@ -4996,8 +5006,7 @@ static int isolate_folios(struct lruvec
|
||||
@@ -4993,8 +5003,7 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw
|
||||
return scanned;
|
||||
}
|
||||
|
||||
@ -71,7 +78,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
{
|
||||
int type;
|
||||
int scanned;
|
||||
@@ -5086,9 +5095,6 @@ retry:
|
||||
@@ -5083,9 +5092,6 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
|
||||
goto retry;
|
||||
}
|
||||
|
||||
@ -81,7 +88,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
return scanned;
|
||||
}
|
||||
|
||||
@@ -5127,67 +5133,26 @@ done:
|
||||
@@ -5124,67 +5130,26 @@ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *
|
||||
return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
|
||||
}
|
||||
|
||||
@ -158,7 +165,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
lru_add_drain();
|
||||
|
||||
@@ -5211,7 +5176,7 @@ static void lru_gen_shrink_lruvec(struct
|
||||
@@ -5208,7 +5173,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
|
||||
if (!nr_to_scan)
|
||||
goto done;
|
||||
|
||||
@ -167,7 +174,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
if (!delta)
|
||||
goto done;
|
||||
|
||||
@@ -5219,7 +5184,7 @@ static void lru_gen_shrink_lruvec(struct
|
||||
@@ -5216,7 +5181,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
|
||||
if (scanned >= nr_to_scan)
|
||||
break;
|
||||
|
||||
@ -176,7 +183,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
break;
|
||||
|
||||
cond_resched();
|
||||
@@ -5669,7 +5634,7 @@ static int run_eviction(struct lruvec *l
|
||||
@@ -5666,7 +5631,7 @@ static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_co
|
||||
if (sc->nr_reclaimed >= nr_to_reclaim)
|
||||
return 0;
|
||||
|
||||
@ -185,3 +192,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
return 0;
|
||||
|
||||
cond_resched();
|
||||
--
|
||||
2.40.1
|
||||
|
@ -1,7 +1,8 @@
|
||||
From e20b7386fccc18c791796eb1dc1a91eee3ccf801 Mon Sep 17 00:00:00 2001
|
||||
From f3c93d2e37a3c56593d7ccf4f4bcf1b58426fdd8 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Wed, 21 Dec 2022 21:19:02 -0700
|
||||
Subject: [PATCH 24/29] mm: multi-gen LRU: remove aging fairness safeguard
|
||||
Subject: [PATCH 04/19] BACKPORT: mm: multi-gen LRU: remove aging fairness
|
||||
safeguard
|
||||
|
||||
Recall that the aging produces the youngest generation: first it scans
|
||||
for accessed folios and updates their gen counters; then it increments
|
||||
@ -31,10 +32,18 @@ Cc: Mike Rapoport <rppt@kernel.org>
|
||||
Cc: Roman Gushchin <roman.gushchin@linux.dev>
|
||||
Cc: Suren Baghdasaryan <surenb@google.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Bug: 274865848
|
||||
(cherry picked from commit 7348cc91821b0cb24dfb00e578047f68299a50ab)
|
||||
[TJ: Resolved conflicts with older function signatures for
|
||||
min_cgroup_below_min / min_cgroup_below_low]
|
||||
Change-Id: I6e36ecfbaaefbc0a56d9a9d5d7cbe404ed7f57a5
|
||||
Signed-off-by: T.J. Mercier <tjmercier@google.com>
|
||||
---
|
||||
mm/vmscan.c | 126 ++++++++++++++++++++++++----------------------------
|
||||
1 file changed, 59 insertions(+), 67 deletions(-)
|
||||
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index 991961180b320..5a2e83e673232 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -136,7 +136,6 @@ struct scan_control {
|
||||
@ -45,7 +54,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
unsigned long last_reclaimed;
|
||||
#endif
|
||||
|
||||
@@ -4458,7 +4457,7 @@ done:
|
||||
@@ -4455,7 +4454,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -54,7 +63,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan)
|
||||
{
|
||||
int gen, type, zone;
|
||||
@@ -4467,6 +4466,13 @@ static bool should_run_aging(struct lruv
|
||||
@@ -4464,6 +4463,13 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
|
||||
unsigned long total = 0;
|
||||
struct lru_gen_folio *lrugen = &lruvec->lrugen;
|
||||
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
@ -68,7 +77,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
for (type = !can_swap; type < ANON_AND_FILE; type++) {
|
||||
unsigned long seq;
|
||||
@@ -4495,8 +4501,6 @@ static bool should_run_aging(struct lruv
|
||||
@@ -4492,8 +4498,6 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
|
||||
* stalls when the number of generations reaches MIN_NR_GENS. Hence, the
|
||||
* ideal number of generations is MIN_NR_GENS+1.
|
||||
*/
|
||||
@ -77,7 +86,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
if (min_seq[!can_swap] + MIN_NR_GENS < max_seq)
|
||||
return false;
|
||||
|
||||
@@ -4515,40 +4519,54 @@ static bool should_run_aging(struct lruv
|
||||
@@ -4512,40 +4516,54 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -153,7 +162,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
}
|
||||
|
||||
/* to protect the working set of the last N jiffies */
|
||||
@@ -4557,46 +4575,32 @@ static unsigned long lru_gen_min_ttl __r
|
||||
@@ -4554,46 +4572,32 @@ static unsigned long lru_gen_min_ttl __read_mostly;
|
||||
static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
|
||||
{
|
||||
struct mem_cgroup *memcg;
|
||||
@ -207,7 +216,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
*/
|
||||
if (mutex_trylock(&oom_lock)) {
|
||||
struct oom_control oc = {
|
||||
@@ -5104,33 +5108,27 @@ retry:
|
||||
@@ -5101,33 +5105,27 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
|
||||
* reclaim.
|
||||
*/
|
||||
static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
|
||||
@ -247,7 +256,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
}
|
||||
|
||||
static unsigned long get_nr_to_reclaim(struct scan_control *sc)
|
||||
@@ -5149,9 +5147,7 @@ static unsigned long get_nr_to_reclaim(s
|
||||
@@ -5146,9 +5144,7 @@ static unsigned long get_nr_to_reclaim(struct scan_control *sc)
|
||||
static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
{
|
||||
struct blk_plug plug;
|
||||
@ -257,7 +266,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
|
||||
|
||||
lru_add_drain();
|
||||
@@ -5172,13 +5168,13 @@ static void lru_gen_shrink_lruvec(struct
|
||||
@@ -5169,13 +5165,13 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
|
||||
else
|
||||
swappiness = 0;
|
||||
|
||||
@ -274,7 +283,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
scanned += delta;
|
||||
if (scanned >= nr_to_scan)
|
||||
@@ -5190,10 +5186,6 @@ static void lru_gen_shrink_lruvec(struct
|
||||
@@ -5187,10 +5183,6 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
@ -285,3 +294,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
clear_mm_walk();
|
||||
|
||||
blk_finish_plug(&plug);
|
||||
--
|
||||
2.40.1
|
||||
|
@ -1,12 +1,11 @@
|
||||
From 107d54931df3c28d81648122e219bf0034ef4e99 Mon Sep 17 00:00:00 2001
|
||||
From eca3858631e0cbad2ca6e40f788892749428e4cb Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Wed, 21 Dec 2022 21:19:03 -0700
|
||||
Subject: [PATCH 25/29] mm: multi-gen LRU: shuffle should_run_aging()
|
||||
Subject: [PATCH 05/19] UPSTREAM: mm: multi-gen LRU: shuffle should_run_aging()
|
||||
|
||||
Move should_run_aging() next to its only caller left.
|
||||
|
||||
Link: https://lkml.kernel.org/r/20221222041905.2431096-6-yuzhao@google.com
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Cc: Johannes Weiner <hannes@cmpxchg.org>
|
||||
Cc: Jonathan Corbet <corbet@lwn.net>
|
||||
Cc: Michael Larabel <Michael@MichaelLarabel.com>
|
||||
@ -15,13 +14,21 @@ Cc: Mike Rapoport <rppt@kernel.org>
|
||||
Cc: Roman Gushchin <roman.gushchin@linux.dev>
|
||||
Cc: Suren Baghdasaryan <surenb@google.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Bug: 274865848
|
||||
(cherry picked from commit 77d4459a4a1a472b7309e475f962dda87d950abd)
|
||||
Signed-off-by: T.J. Mercier <tjmercier@google.com>
|
||||
Change-Id: I3b0383fe16b93a783b4d8c0b3a0b325160392576
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Signed-off-by: T.J. Mercier <tjmercier@google.com>
|
||||
---
|
||||
mm/vmscan.c | 124 ++++++++++++++++++++++++++--------------------------
|
||||
1 file changed, 62 insertions(+), 62 deletions(-)
|
||||
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index 5a2e83e673232..0c47952714b26 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -4457,68 +4457,6 @@ done:
|
||||
@@ -4454,68 +4454,6 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -90,7 +97,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc)
|
||||
{
|
||||
int gen, type, zone;
|
||||
@@ -5102,6 +5040,68 @@ retry:
|
||||
@@ -5099,6 +5037,68 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
|
||||
return scanned;
|
||||
}
|
||||
|
||||
@ -159,3 +166,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
/*
|
||||
* For future optimizations:
|
||||
* 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg
|
||||
--
|
||||
2.40.1
|
||||
|
@ -1,7 +1,8 @@
|
||||
From fa6363828d314e837c5f79e97ea5e8c0d2f7f062 Mon Sep 17 00:00:00 2001
|
||||
From 8ee8571e47aa75221e5fbd4c9c7802fc4244c346 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Wed, 21 Dec 2022 21:19:04 -0700
|
||||
Subject: [PATCH 26/29] mm: multi-gen LRU: per-node lru_gen_folio lists
|
||||
Subject: [PATCH 06/19] BACKPORT: mm: multi-gen LRU: per-node lru_gen_folio
|
||||
lists
|
||||
|
||||
For each node, memcgs are divided into two generations: the old and
|
||||
the young. For each generation, memcgs are randomly sharded into
|
||||
@ -58,18 +59,26 @@ Cc: Mike Rapoport <rppt@kernel.org>
|
||||
Cc: Roman Gushchin <roman.gushchin@linux.dev>
|
||||
Cc: Suren Baghdasaryan <surenb@google.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Bug: 274865848
|
||||
(cherry picked from commit e4dde56cd208674ce899b47589f263499e5b8cdc)
|
||||
[TJ: Resolved conflicts with older function signatures for
|
||||
min_cgroup_below_min / min_cgroup_below_low and includes]
|
||||
Change-Id: Idc8a0f635e035d72dd911f807d1224cb47cbd655
|
||||
Signed-off-by: T.J. Mercier <tjmercier@google.com>
|
||||
---
|
||||
include/linux/memcontrol.h | 10 +
|
||||
include/linux/mm_inline.h | 17 ++
|
||||
include/linux/mmzone.h | 117 +++++++++++-
|
||||
mm/memcontrol.c | 16 ++
|
||||
mm/folio_alloc.c | 1 +
|
||||
mm/vmscan.c | 373 +++++++++++++++++++++++++++++++++----
|
||||
6 files changed, 499 insertions(+), 35 deletions(-)
|
||||
mm/page_alloc.c | 1 +
|
||||
mm/vmscan.c | 374 +++++++++++++++++++++++++++++++++----
|
||||
6 files changed, 500 insertions(+), 35 deletions(-)
|
||||
|
||||
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
|
||||
index e039763029563..82d28b052a9e5 100644
|
||||
--- a/include/linux/memcontrol.h
|
||||
+++ b/include/linux/memcontrol.h
|
||||
@@ -790,6 +790,11 @@ static inline void obj_cgroup_put(struct
|
||||
@@ -790,6 +790,11 @@ static inline void obj_cgroup_put(struct obj_cgroup *objcg)
|
||||
percpu_ref_put(&objcg->refcnt);
|
||||
}
|
||||
|
||||
@ -81,7 +90,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
static inline void mem_cgroup_put(struct mem_cgroup *memcg)
|
||||
{
|
||||
if (memcg)
|
||||
@@ -1290,6 +1295,11 @@ static inline void obj_cgroup_put(struct
|
||||
@@ -1290,6 +1295,11 @@ static inline void obj_cgroup_put(struct obj_cgroup *objcg)
|
||||
{
|
||||
}
|
||||
|
||||
@ -93,9 +102,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
static inline void mem_cgroup_put(struct mem_cgroup *memcg)
|
||||
{
|
||||
}
|
||||
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
|
||||
index da38e3d962e2f..c1fd3922dc5dd 100644
|
||||
--- a/include/linux/mm_inline.h
|
||||
+++ b/include/linux/mm_inline.h
|
||||
@@ -122,6 +122,18 @@ static inline bool lru_gen_in_fault(void
|
||||
@@ -122,6 +122,18 @@ static inline bool lru_gen_in_fault(void)
|
||||
return current->in_lru_fault;
|
||||
}
|
||||
|
||||
@ -114,7 +125,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
static inline int lru_gen_from_seq(unsigned long seq)
|
||||
{
|
||||
return seq % MAX_NR_GENS;
|
||||
@@ -297,6 +309,11 @@ static inline bool lru_gen_in_fault(void
|
||||
@@ -297,6 +309,11 @@ static inline bool lru_gen_in_fault(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -126,6 +137,8 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
|
||||
{
|
||||
return false;
|
||||
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
|
||||
index 02e4323744715..66e067a635682 100644
|
||||
--- a/include/linux/mmzone.h
|
||||
+++ b/include/linux/mmzone.h
|
||||
@@ -7,6 +7,7 @@
|
||||
@ -167,7 +180,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -479,12 +497,87 @@ void lru_gen_init_lruvec(struct lruvec *
|
||||
@@ -479,12 +497,87 @@ void lru_gen_init_lruvec(struct lruvec *lruvec);
|
||||
void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
@ -256,7 +269,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
|
||||
{
|
||||
}
|
||||
@@ -494,6 +587,7 @@ static inline void lru_gen_look_around(s
|
||||
@@ -494,6 +587,7 @@ static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
@ -264,7 +277,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
|
||||
{
|
||||
}
|
||||
@@ -501,7 +595,24 @@ static inline void lru_gen_init_memcg(st
|
||||
@@ -501,7 +595,24 @@ static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
|
||||
static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg)
|
||||
{
|
||||
}
|
||||
@ -299,9 +312,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
#endif
|
||||
|
||||
CACHELINE_PADDING(_pad2_);
|
||||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
|
||||
index 3e8f1ad0fe9db..7815d556e38cc 100644
|
||||
--- a/mm/memcontrol.c
|
||||
+++ b/mm/memcontrol.c
|
||||
@@ -477,6 +477,16 @@ static void mem_cgroup_update_tree(struc
|
||||
@@ -477,6 +477,16 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid)
|
||||
struct mem_cgroup_per_node *mz;
|
||||
struct mem_cgroup_tree_per_node *mctz;
|
||||
|
||||
@ -318,7 +333,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
mctz = soft_limit_tree.rb_tree_per_node[nid];
|
||||
if (!mctz)
|
||||
return;
|
||||
@@ -3522,6 +3532,9 @@ unsigned long mem_cgroup_soft_limit_recl
|
||||
@@ -3522,6 +3532,9 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
|
||||
struct mem_cgroup_tree_per_node *mctz;
|
||||
unsigned long excess;
|
||||
|
||||
@ -328,7 +343,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
if (order > 0)
|
||||
return 0;
|
||||
|
||||
@@ -5382,6 +5395,7 @@ static int mem_cgroup_css_online(struct
|
||||
@@ -5382,6 +5395,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
|
||||
if (unlikely(mem_cgroup_is_root(memcg)))
|
||||
queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
|
||||
2UL*HZ);
|
||||
@ -336,7 +351,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
return 0;
|
||||
offline_kmem:
|
||||
memcg_offline_kmem(memcg);
|
||||
@@ -5413,6 +5427,7 @@ static void mem_cgroup_css_offline(struc
|
||||
@@ -5413,6 +5427,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
|
||||
memcg_offline_kmem(memcg);
|
||||
reparent_shrinker_deferred(memcg);
|
||||
wb_memcg_offline(memcg);
|
||||
@ -344,7 +359,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
drain_all_stock(memcg);
|
||||
|
||||
@@ -5424,6 +5439,7 @@ static void mem_cgroup_css_released(stru
|
||||
@@ -5424,6 +5439,7 @@ static void mem_cgroup_css_released(struct cgroup_subsys_state *css)
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
|
||||
|
||||
invalidate_reclaim_iterators(memcg);
|
||||
@ -352,9 +367,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
}
|
||||
|
||||
static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
|
||||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
|
||||
index 69668817fed37..473057b81a9df 100644
|
||||
--- a/mm/page_alloc.c
|
||||
+++ b/mm/page_alloc.c
|
||||
@@ -7957,6 +7957,7 @@ static void __init free_area_init_node(i
|
||||
@@ -7957,6 +7957,7 @@ static void __init free_area_init_node(int nid)
|
||||
pgdat_set_deferred_range(pgdat);
|
||||
|
||||
free_area_init_core(pgdat);
|
||||
@ -362,6 +379,8 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
}
|
||||
|
||||
static void __init free_area_init_memoryless_node(int nid)
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index 0c47952714b26..65eb28448f216 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -54,6 +54,8 @@
|
||||
@ -385,7 +404,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
/* Allocation order */
|
||||
s8 order;
|
||||
|
||||
@@ -3160,6 +3157,9 @@ DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_ca
|
||||
@@ -3160,6 +3157,9 @@ DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS);
|
||||
for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \
|
||||
for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
|
||||
|
||||
@ -395,7 +414,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid)
|
||||
{
|
||||
struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
@@ -4443,8 +4443,7 @@ done:
|
||||
@@ -4440,8 +4440,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
|
||||
if (sc->priority <= DEF_PRIORITY - 2)
|
||||
wait_event_killable(lruvec->mm_state.wait,
|
||||
max_seq < READ_ONCE(lrugen->max_seq));
|
||||
@ -405,7 +424,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
}
|
||||
|
||||
VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq));
|
||||
@@ -4517,8 +4516,6 @@ static void lru_gen_age_node(struct pgli
|
||||
@@ -4514,8 +4513,6 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
|
||||
|
||||
VM_WARN_ON_ONCE(!current_is_kswapd());
|
||||
|
||||
@ -414,7 +433,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
/* check the order to exclude compaction-induced reclaim */
|
||||
if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY)
|
||||
return;
|
||||
@@ -5107,8 +5104,7 @@ static bool should_run_aging(struct lruv
|
||||
@@ -5104,8 +5101,7 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
|
||||
* 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg
|
||||
* reclaim.
|
||||
*/
|
||||
@ -424,7 +443,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
{
|
||||
unsigned long nr_to_scan;
|
||||
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
@@ -5125,10 +5121,8 @@ static unsigned long get_nr_to_scan(stru
|
||||
@@ -5122,10 +5118,8 @@ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *
|
||||
if (sc->priority == DEF_PRIORITY)
|
||||
return nr_to_scan;
|
||||
|
||||
@ -436,7 +455,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
}
|
||||
|
||||
static unsigned long get_nr_to_reclaim(struct scan_control *sc)
|
||||
@@ -5137,29 +5131,18 @@ static unsigned long get_nr_to_reclaim(s
|
||||
@@ -5134,29 +5128,18 @@ static unsigned long get_nr_to_reclaim(struct scan_control *sc)
|
||||
if (!global_reclaim(sc))
|
||||
return -1;
|
||||
|
||||
@ -468,7 +487,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
if (sc->may_swap)
|
||||
swappiness = get_swappiness(lruvec, sc);
|
||||
@@ -5169,7 +5152,7 @@ static void lru_gen_shrink_lruvec(struct
|
||||
@@ -5166,7 +5149,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
|
||||
swappiness = 0;
|
||||
|
||||
nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
|
||||
@ -477,7 +496,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
break;
|
||||
|
||||
delta = evict_folios(lruvec, sc, swappiness);
|
||||
@@ -5186,10 +5169,250 @@ static void lru_gen_shrink_lruvec(struct
|
||||
@@ -5183,11 +5166,252 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
@ -515,8 +534,9 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
+
|
||||
+ shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority);
|
||||
+
|
||||
+ vmpressure(sc->gfp_mask, memcg, false, sc->nr_scanned - scanned,
|
||||
+ sc->nr_reclaimed - reclaimed);
|
||||
+ if (!sc->proactive)
|
||||
+ vmpressure(sc->gfp_mask, memcg, false, sc->nr_scanned - scanned,
|
||||
+ sc->nr_reclaimed - reclaimed);
|
||||
+
|
||||
+ sc->nr_reclaimed += current->reclaim_state->reclaimed_slab;
|
||||
+ current->reclaim_state->reclaimed_slab = 0;
|
||||
@ -538,7 +558,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
+ struct mem_cgroup *memcg = NULL;
|
||||
+ unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
|
||||
+
|
||||
+ bin = first_bin = prandom_u32_max(MEMCG_NR_BINS);
|
||||
+ bin = first_bin = get_random_u32_below(MEMCG_NR_BINS);
|
||||
+restart:
|
||||
+ gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq));
|
||||
+
|
||||
@ -601,11 +621,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
+ if (try_to_shrink_lruvec(lruvec, sc))
|
||||
+ lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG);
|
||||
+
|
||||
+ clear_mm_walk();
|
||||
+
|
||||
+ blk_finish_plug(&plug);
|
||||
+}
|
||||
+
|
||||
clear_mm_walk();
|
||||
|
||||
blk_finish_plug(&plug);
|
||||
}
|
||||
|
||||
+#else /* !CONFIG_MEMCG */
|
||||
+
|
||||
+static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
|
||||
@ -671,9 +691,9 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
+ if (current_is_kswapd())
|
||||
+ sc->nr_reclaimed += reclaimed;
|
||||
+
|
||||
clear_mm_walk();
|
||||
|
||||
blk_finish_plug(&plug);
|
||||
+ clear_mm_walk();
|
||||
+
|
||||
+ blk_finish_plug(&plug);
|
||||
+
|
||||
+ /* kswapd should never fail */
|
||||
+ pgdat->kswapd_failures = 0;
|
||||
@ -684,7 +704,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
+{
|
||||
+ int seg;
|
||||
+ int old, new;
|
||||
+ int bin = prandom_u32_max(MEMCG_NR_BINS);
|
||||
+ int bin = get_random_u32_below(MEMCG_NR_BINS);
|
||||
+ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
||||
+
|
||||
+ spin_lock(&pgdat->memcg_lru.lock);
|
||||
@ -723,12 +743,13 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
+ WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
|
||||
+
|
||||
+ spin_unlock(&pgdat->memcg_lru.lock);
|
||||
}
|
||||
+}
|
||||
+#endif
|
||||
|
||||
+
|
||||
/******************************************************************************
|
||||
* state change
|
||||
@@ -5647,11 +5870,11 @@ static int run_cmd(char cmd, int memcg_i
|
||||
******************************************************************************/
|
||||
@@ -5644,11 +5868,11 @@ static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq,
|
||||
|
||||
if (!mem_cgroup_disabled()) {
|
||||
rcu_read_lock();
|
||||
@ -743,7 +764,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!memcg)
|
||||
@@ -5799,6 +6022,19 @@ void lru_gen_init_lruvec(struct lruvec *
|
||||
@@ -5796,6 +6020,19 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
@ -763,7 +784,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
void lru_gen_init_memcg(struct mem_cgroup *memcg)
|
||||
{
|
||||
INIT_LIST_HEAD(&memcg->mm_list.fifo);
|
||||
@@ -5822,7 +6058,69 @@ void lru_gen_exit_memcg(struct mem_cgrou
|
||||
@@ -5819,7 +6056,69 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -773,7 +794,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
+{
|
||||
+ int gen;
|
||||
+ int nid;
|
||||
+ int bin = prandom_u32_max(MEMCG_NR_BINS);
|
||||
+ int bin = get_random_u32_below(MEMCG_NR_BINS);
|
||||
+
|
||||
+ for_each_node(nid) {
|
||||
+ struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
@ -834,7 +855,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
static int __init init_lru_gen(void)
|
||||
{
|
||||
@@ -5849,6 +6147,10 @@ static void lru_gen_shrink_lruvec(struct
|
||||
@@ -5846,6 +6145,10 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
|
||||
{
|
||||
}
|
||||
|
||||
@ -845,7 +866,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
#endif /* CONFIG_LRU_GEN */
|
||||
|
||||
static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
@@ -5862,7 +6164,7 @@ static void shrink_lruvec(struct lruvec
|
||||
@@ -5859,7 +6162,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
bool proportional_reclaim;
|
||||
struct blk_plug plug;
|
||||
|
||||
@ -854,7 +875,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
lru_gen_shrink_lruvec(lruvec, sc);
|
||||
return;
|
||||
}
|
||||
@@ -6105,6 +6407,11 @@ static void shrink_node(pg_data_t *pgdat
|
||||
@@ -6102,6 +6405,11 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
|
||||
struct lruvec *target_lruvec;
|
||||
bool reclaimable = false;
|
||||
|
||||
@ -866,3 +887,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
|
||||
|
||||
again:
|
||||
--
|
||||
2.40.1
|
||||
|
@ -1,7 +1,7 @@
|
||||
From 93147736b5b3a21bea24313bfc7a696829932009 Mon Sep 17 00:00:00 2001
|
||||
From 11b14ee8cbbbebd8204609076a9327a1171cd253 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Wed, 21 Dec 2022 21:19:05 -0700
|
||||
Subject: [PATCH 27/29] mm: multi-gen LRU: clarify scan_control flags
|
||||
Subject: [PATCH 07/19] BACKPORT: mm: multi-gen LRU: clarify scan_control flags
|
||||
|
||||
Among the flags in scan_control:
|
||||
1. sc->may_swap, which indicates swap constraint due to memsw.max, is
|
||||
@ -12,7 +12,7 @@ Among the flags in scan_control:
|
||||
3. !(sc->gfp_mask & __GFP_IO), which indicates IO constraint, lowers
|
||||
swappiness to prioritize file LRU, since clean file folios are more
|
||||
likely to exist.
|
||||
4. sc->may_writefolio and sc->may_unmap, which indicates opportunistic
|
||||
4. sc->may_writepage and sc->may_unmap, which indicates opportunistic
|
||||
reclaim, are rejected, since unmapped clean folios are already
|
||||
prioritized. Scanning for more of them is likely futile and can
|
||||
cause high reclaim latency when there is a large number of memcgs.
|
||||
@ -29,13 +29,21 @@ Cc: Mike Rapoport <rppt@kernel.org>
|
||||
Cc: Roman Gushchin <roman.gushchin@linux.dev>
|
||||
Cc: Suren Baghdasaryan <surenb@google.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Bug: 274865848
|
||||
(cherry picked from commit e9d4e1ee788097484606c32122f146d802a9c5fb)
|
||||
[TJ: Resolved conflict with older function signature for min_cgroup_below_min, and over
|
||||
cdded861182142ac4488a4d64c571107aeb77f53 ("ANDROID: MGLRU: Don't skip anon reclaim if swap low")]
|
||||
Change-Id: Ic2e779eaf4e91a3921831b4e2fa10c740dc59d50
|
||||
Signed-off-by: T.J. Mercier <tjmercier@google.com>
|
||||
---
|
||||
mm/vmscan.c | 55 +++++++++++++++++++++++++++--------------------------
|
||||
1 file changed, 28 insertions(+), 27 deletions(-)
|
||||
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index 65eb28448f216..0a0e1250ffc87 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -3185,6 +3185,9 @@ static int get_swappiness(struct lruvec
|
||||
@@ -3185,6 +3185,9 @@ static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc)
|
||||
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
||||
|
||||
@ -45,7 +53,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
if (!can_demote(pgdat->node_id, sc) &&
|
||||
mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH)
|
||||
return 0;
|
||||
@@ -4226,7 +4229,7 @@ static void walk_mm(struct lruvec *lruve
|
||||
@@ -4223,7 +4226,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
|
||||
} while (err == -EAGAIN);
|
||||
}
|
||||
|
||||
@ -54,7 +62,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
{
|
||||
struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk;
|
||||
|
||||
@@ -4234,7 +4237,7 @@ static struct lru_gen_mm_walk *set_mm_wa
|
||||
@@ -4231,7 +4234,7 @@ static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat)
|
||||
VM_WARN_ON_ONCE(walk);
|
||||
|
||||
walk = &pgdat->mm_walk;
|
||||
@ -63,7 +71,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
VM_WARN_ON_ONCE(current_is_kswapd());
|
||||
|
||||
walk = kzalloc(sizeof(*walk), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
|
||||
@@ -4420,7 +4423,7 @@ static bool try_to_inc_max_seq(struct lr
|
||||
@@ -4417,7 +4420,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
|
||||
goto done;
|
||||
}
|
||||
|
||||
@ -72,7 +80,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
if (!walk) {
|
||||
success = iterate_mm_list_nowalk(lruvec, max_seq);
|
||||
goto done;
|
||||
@@ -4489,8 +4492,6 @@ static bool lruvec_is_reclaimable(struct
|
||||
@@ -4486,8 +4489,6 @@ static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc
|
||||
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
DEFINE_MIN_SEQ(lruvec);
|
||||
|
||||
@ -81,7 +89,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
/* see the comment on lru_gen_folio */
|
||||
gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
|
||||
birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
|
||||
@@ -4746,12 +4747,8 @@ static bool isolate_folio(struct lruvec
|
||||
@@ -4743,12 +4744,8 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca
|
||||
{
|
||||
bool success;
|
||||
|
||||
@ -95,19 +103,19 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
(folio_test_dirty(folio) ||
|
||||
(folio_test_anon(folio) && !folio_test_swapcache(folio))))
|
||||
return false;
|
||||
@@ -4848,9 +4845,8 @@ static int scan_folios(struct lruvec *lr
|
||||
@@ -4845,9 +4842,8 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
|
||||
__count_vm_events(PGSCAN_ANON + type, isolated);
|
||||
|
||||
/*
|
||||
- * There might not be eligible pages due to reclaim_idx, may_unmap and
|
||||
- * may_writepage. Check the remaining to prevent livelock if it's not
|
||||
- * making progress.
|
||||
+ * There might not be eligible pages due to reclaim_idx. Check the
|
||||
+ * There might not be eligible folios due to reclaim_idx. Check the
|
||||
+ * remaining to prevent livelock if it's not making progress.
|
||||
*/
|
||||
return isolated || !remaining ? scanned : 0;
|
||||
}
|
||||
@@ -5110,8 +5106,7 @@ static long get_nr_to_scan(struct lruvec
|
||||
@@ -5107,8 +5103,7 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool
|
||||
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
DEFINE_MAX_SEQ(lruvec);
|
||||
|
||||
@ -117,7 +125,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
return 0;
|
||||
|
||||
if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan))
|
||||
@@ -5139,17 +5134,14 @@ static bool try_to_shrink_lruvec(struct
|
||||
@@ -5136,17 +5131,14 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
long nr_to_scan;
|
||||
unsigned long scanned = 0;
|
||||
unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
|
||||
@ -140,7 +148,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
|
||||
if (nr_to_scan <= 0)
|
||||
@@ -5279,12 +5271,13 @@ static void lru_gen_shrink_lruvec(struct
|
||||
@@ -5277,12 +5269,13 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
|
||||
struct blk_plug plug;
|
||||
|
||||
VM_WARN_ON_ONCE(global_reclaim(sc));
|
||||
@ -155,7 +163,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
|
||||
if (try_to_shrink_lruvec(lruvec, sc))
|
||||
lru_gen_rotate_memcg(lruvec, MEMCG_LRU_YOUNG);
|
||||
@@ -5340,11 +5333,19 @@ static void lru_gen_shrink_node(struct p
|
||||
@@ -5338,11 +5331,19 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *
|
||||
|
||||
VM_WARN_ON_ONCE(!global_reclaim(sc));
|
||||
|
||||
@ -172,11 +180,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
blk_start_plug(&plug);
|
||||
|
||||
- set_mm_walk(pgdat);
|
||||
+ set_mm_walk(NULL, sc->proactive);
|
||||
+ set_mm_walk(pgdat, sc->proactive);
|
||||
|
||||
set_initial_priority(pgdat, sc);
|
||||
|
||||
@@ -5362,7 +5363,7 @@ static void lru_gen_shrink_node(struct p
|
||||
@@ -5360,7 +5361,7 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *
|
||||
clear_mm_walk();
|
||||
|
||||
blk_finish_plug(&plug);
|
||||
@ -185,7 +193,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
/* kswapd should never fail */
|
||||
pgdat->kswapd_failures = 0;
|
||||
}
|
||||
@@ -5934,7 +5935,7 @@ static ssize_t lru_gen_seq_write(struct
|
||||
@@ -5932,7 +5933,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
|
||||
set_task_reclaim_state(current, &sc.reclaim_state);
|
||||
flags = memalloc_noreclaim_save();
|
||||
blk_start_plug(&plug);
|
||||
@ -194,3 +202,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
err = -ENOMEM;
|
||||
goto done;
|
||||
}
|
||||
--
|
||||
2.40.1
|
||||
|
@ -1,8 +1,8 @@
|
||||
From cf3297e4c7a928da8b2b2f0baff2f9c69ea57952 Mon Sep 17 00:00:00 2001
|
||||
From 25887d48dff860751a06caa4188bfaf6bfb6e4b2 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Wed, 21 Dec 2022 21:19:06 -0700
|
||||
Subject: [PATCH 28/29] mm: multi-gen LRU: simplify arch_has_hw_pte_young()
|
||||
check
|
||||
Subject: [PATCH 08/19] UPSTREAM: mm: multi-gen LRU: simplify
|
||||
arch_has_hw_pte_young() check
|
||||
|
||||
Scanning page tables when hardware does not set the accessed bit has
|
||||
no real use cases.
|
||||
@ -17,13 +17,19 @@ Cc: Mike Rapoport <rppt@kernel.org>
|
||||
Cc: Roman Gushchin <roman.gushchin@linux.dev>
|
||||
Cc: Suren Baghdasaryan <surenb@google.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Bug: 274865848
|
||||
(cherry picked from commit f386e9314025ea99dae639ed2032560a92081430)
|
||||
Change-Id: I84d97ab665b4e3bb862a9bc7d72f50dea7191a6b
|
||||
Signed-off-by: T.J. Mercier <tjmercier@google.com>
|
||||
---
|
||||
mm/vmscan.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index 0a0e1250ffc87..aa9746f2bc80b 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -4418,7 +4418,7 @@ static bool try_to_inc_max_seq(struct lr
|
||||
@@ -4415,7 +4415,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
|
||||
* handful of PTEs. Spreading the work out over a period of time usually
|
||||
* is less efficient, but it avoids bursty page faults.
|
||||
*/
|
||||
@ -32,3 +38,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
success = iterate_mm_list_nowalk(lruvec, max_seq);
|
||||
goto done;
|
||||
}
|
||||
--
|
||||
2.40.1
|
||||
|
@ -1,7 +1,7 @@
|
||||
From cc67f962cc53f6e1dfa92eb85b7b26fe83a3c66f Mon Sep 17 00:00:00 2001
|
||||
From 620b0ee94455e48d124414cd06d8a53f69fb6453 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Mon, 13 Feb 2023 00:53:22 -0700
|
||||
Subject: [PATCH 29/29] mm: multi-gen LRU: avoid futile retries
|
||||
Subject: [PATCH 09/19] UPSTREAM: mm: multi-gen LRU: avoid futile retries
|
||||
|
||||
Recall that the per-node memcg LRU has two generations and they alternate
|
||||
when the last memcg (of a given node) is moved from one to the other.
|
||||
@ -19,13 +19,19 @@ Fixes: e4dde56cd208 ("mm: multi-gen LRU: per-node lru_gen_folio lists")
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Reported-by: T.J. Mercier <tjmercier@google.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Bug: 274865848
|
||||
(cherry picked from commit 9f550d78b40da21b4da515db4c37d8d7b12aa1a6)
|
||||
Change-Id: Ie92535676b005ec9e7987632b742fdde8d54436f
|
||||
Signed-off-by: T.J. Mercier <tjmercier@google.com>
|
||||
---
|
||||
mm/vmscan.c | 25 +++++++++++++++----------
|
||||
1 file changed, 15 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index aa9746f2bc80b..49da02f841c81 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -5208,18 +5208,20 @@ static int shrink_one(struct lruvec *lru
|
||||
@@ -5206,18 +5206,20 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
|
||||
|
||||
static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
|
||||
{
|
||||
@ -41,14 +47,14 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
- struct mem_cgroup *memcg = NULL;
|
||||
unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
|
||||
|
||||
bin = first_bin = prandom_u32_max(MEMCG_NR_BINS);
|
||||
bin = first_bin = get_random_u32_below(MEMCG_NR_BINS);
|
||||
restart:
|
||||
+ op = 0;
|
||||
+ memcg = NULL;
|
||||
gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq));
|
||||
|
||||
rcu_read_lock();
|
||||
@@ -5243,14 +5245,22 @@ restart:
|
||||
@@ -5241,14 +5243,22 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
|
||||
|
||||
op = shrink_one(lruvec, sc);
|
||||
|
||||
@ -74,7 +80,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
/* restart if raced with lru_gen_rotate_memcg() */
|
||||
if (gen != get_nulls_value(pos))
|
||||
goto restart;
|
||||
@@ -5259,11 +5269,6 @@ restart:
|
||||
@@ -5257,11 +5267,6 @@ static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
|
||||
bin = get_memcg_bin(bin + 1);
|
||||
if (bin != first_bin)
|
||||
goto restart;
|
||||
@ -86,3 +92,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
}
|
||||
|
||||
static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
--
|
||||
2.40.1
|
||||
|
@ -1,7 +1,7 @@
|
||||
From 6c7f552a48b49a8612786a28a2239fbc24fac289 Mon Sep 17 00:00:00 2001
|
||||
From 70d216c71ff5c5b17dd1da6294f97b91fb6aba7a Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Fri, 30 Dec 2022 14:52:51 -0700
|
||||
Subject: [PATCH 19/29] mm: add vma_has_recency()
|
||||
Subject: [PATCH 10/19] UPSTREAM: mm: add vma_has_recency()
|
||||
|
||||
Add vma_has_recency() to indicate whether a VMA may exhibit temporal
|
||||
locality that the LRU algorithm relies on.
|
||||
@ -43,22 +43,28 @@ results are available in that thread.
|
||||
[1] https://lore.kernel.org/r/Y31s%2FK8T85jh05wH@google.com/
|
||||
|
||||
Link: https://lkml.kernel.org/r/20221230215252.2628425-1-yuzhao@google.com
|
||||
Change-Id: I291dcb795197659e40e46539cd32b857677c34ad
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
|
||||
Cc: Andrea Righi <andrea.righi@canonical.com>
|
||||
Cc: Johannes Weiner <hannes@cmpxchg.org>
|
||||
Cc: Michael Larabel <Michael@MichaelLarabel.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
(cherry picked from commit 8788f6781486769d9598dcaedc3fe0eb12fc3e59)
|
||||
Bug: 274865848
|
||||
Signed-off-by: T.J. Mercier <tjmercier@google.com>
|
||||
---
|
||||
include/linux/mm_inline.h | 9 +++++++++
|
||||
mm/memory.c | 8 ++++----
|
||||
include/linux/mm_inline.h | 8 ++++++++
|
||||
mm/memory.c | 7 +++----
|
||||
mm/rmap.c | 42 +++++++++++++++++----------------------
|
||||
mm/vmscan.c | 5 ++++-
|
||||
4 files changed, 35 insertions(+), 29 deletions(-)
|
||||
4 files changed, 33 insertions(+), 29 deletions(-)
|
||||
|
||||
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
|
||||
index c1fd3922dc5dd..7bb2e5f94734c 100644
|
||||
--- a/include/linux/mm_inline.h
|
||||
+++ b/include/linux/mm_inline.h
|
||||
@@ -578,4 +578,12 @@ pte_install_uffd_wp_if_needed(struct vm_
|
||||
@@ -595,4 +595,12 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr,
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -71,9 +77,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
+}
|
||||
+
|
||||
#endif
|
||||
diff --git a/mm/memory.c b/mm/memory.c
|
||||
index 747b7ea30f890..c2f48f8003c2e 100644
|
||||
--- a/mm/memory.c
|
||||
+++ b/mm/memory.c
|
||||
@@ -1435,8 +1435,7 @@ again:
|
||||
@@ -1435,8 +1435,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
|
||||
force_flush = 1;
|
||||
set_page_dirty(page);
|
||||
}
|
||||
@ -83,7 +91,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
mark_page_accessed(page);
|
||||
}
|
||||
rss[mm_counter(page)]--;
|
||||
@@ -5170,8 +5169,8 @@ static inline void mm_account_fault(stru
|
||||
@@ -5170,8 +5169,8 @@ static inline void mm_account_fault(struct pt_regs *regs,
|
||||
#ifdef CONFIG_LRU_GEN
|
||||
static void lru_gen_enter_fault(struct vm_area_struct *vma)
|
||||
{
|
||||
@ -94,9 +102,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
}
|
||||
|
||||
static void lru_gen_exit_fault(void)
|
||||
diff --git a/mm/rmap.c b/mm/rmap.c
|
||||
index 7da2d8d097d9b..825dac3caa1e5 100644
|
||||
--- a/mm/rmap.c
|
||||
+++ b/mm/rmap.c
|
||||
@@ -823,25 +823,14 @@ static bool folio_referenced_one(struct
|
||||
@@ -823,25 +823,14 @@ static bool folio_referenced_one(struct folio *folio,
|
||||
}
|
||||
|
||||
if (pvmw.pte) {
|
||||
@ -125,16 +135,16 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
|
||||
if (pmdp_clear_flush_young_notify(vma, address,
|
||||
pvmw.pmd))
|
||||
@@ -875,7 +864,20 @@ static bool invalid_folio_referenced_vma
|
||||
@@ -875,7 +864,20 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
|
||||
struct folio_referenced_arg *pra = arg;
|
||||
struct mem_cgroup *memcg = pra->memcg;
|
||||
|
||||
- if (!mm_match_cgroup(vma->vm_mm, memcg))
|
||||
+ /*
|
||||
+ * Ignore references from this mapping if it has no recency. If the
|
||||
+ * page has been used in another mapping, we will catch it; if this
|
||||
+ * folio has been used in another mapping, we will catch it; if this
|
||||
+ * other mapping is already gone, the unmap path will have set the
|
||||
+ * referenced flag or activated the page in zap_pte_range().
|
||||
+ * referenced flag or activated the folio in zap_pte_range().
|
||||
+ */
|
||||
+ if (!vma_has_recency(vma))
|
||||
+ return true;
|
||||
@ -147,7 +157,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@@ -906,6 +908,7 @@ int folio_referenced(struct folio *folio
|
||||
@@ -906,6 +908,7 @@ int folio_referenced(struct folio *folio, int is_locked,
|
||||
.arg = (void *)&pra,
|
||||
.anon_lock = folio_lock_anon_vma_read,
|
||||
.try_lock = true,
|
||||
@ -155,7 +165,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
};
|
||||
|
||||
*vm_flags = 0;
|
||||
@@ -921,15 +924,6 @@ int folio_referenced(struct folio *folio
|
||||
@@ -921,15 +924,6 @@ int folio_referenced(struct folio *folio, int is_locked,
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -171,9 +181,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
rmap_walk(folio, &rwc);
|
||||
*vm_flags = pra.vm_flags;
|
||||
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index 49da02f841c81..596fed6ae0439 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -3766,7 +3766,10 @@ static int should_skip_vma(unsigned long
|
||||
@@ -3778,7 +3778,10 @@ static int should_skip_vma(unsigned long start, unsigned long end, struct mm_wal
|
||||
if (is_vm_hugetlb_page(vma))
|
||||
return true;
|
||||
|
||||
@ -185,3 +197,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
return true;
|
||||
|
||||
if (vma == get_gate_vma(vma->vm_mm))
|
||||
--
|
||||
2.40.1
|
||||
|
@ -1,7 +1,7 @@
|
||||
From 686c3d4f71de9e0e7a27f03a5617a712385f90cd Mon Sep 17 00:00:00 2001
|
||||
From 9ca4e437a24dfc4ec6c362f319eb9850b9eca497 Mon Sep 17 00:00:00 2001
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
Date: Fri, 30 Dec 2022 14:52:52 -0700
|
||||
Subject: [PATCH 20/29] mm: support POSIX_FADV_NOREUSE
|
||||
Subject: [PATCH 11/19] UPSTREAM: mm: support POSIX_FADV_NOREUSE
|
||||
|
||||
This patch adds POSIX_FADV_NOREUSE to vma_has_recency() so that the LRU
|
||||
algorithm can ignore access to mapped files marked by this flag.
|
||||
@ -22,7 +22,7 @@ Its limitations are:
|
||||
by two users and one of them having set POSIX_FADV_NOREUSE on the
|
||||
file, this page will be activated upon the second user accessing
|
||||
it. This corner case can be covered by checking POSIX_FADV_NOREUSE
|
||||
before calling mark_page_accessed() on the read path. But it is
|
||||
before calling folio_mark_accessed() on the read path. But it is
|
||||
considered not worth the effort.
|
||||
|
||||
There have been a few attempts to support POSIX_FADV_NOREUSE, e.g., [1].
|
||||
@ -67,32 +67,40 @@ which makes it on par with the active/inactive LRU.
|
||||
[2] https://openbenchmarking.org/result/2209259-PTS-MGLRU8GB57
|
||||
|
||||
Link: https://lkml.kernel.org/r/20221230215252.2628425-2-yuzhao@google.com
|
||||
Change-Id: I0b7f5f971d78014ea1ba44cee6a8ec902a4330d0
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
|
||||
Cc: Andrea Righi <andrea.righi@canonical.com>
|
||||
Cc: Johannes Weiner <hannes@cmpxchg.org>
|
||||
Cc: Michael Larabel <Michael@MichaelLarabel.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
(cherry picked from commit 17e810229cb3068b692fa078bd9b3a6527e0866a)
|
||||
Bug: 274865848
|
||||
Signed-off-by: T.J. Mercier <tjmercier@google.com>
|
||||
---
|
||||
include/linux/fs.h | 2 ++
|
||||
include/linux/mm_inline.h | 3 +++
|
||||
mm/fadvise.c | 5 ++++-
|
||||
3 files changed, 9 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/linux/fs.h b/include/linux/fs.h
|
||||
index f14ecbeab2a9d..97f9c41c1a43a 100644
|
||||
--- a/include/linux/fs.h
|
||||
+++ b/include/linux/fs.h
|
||||
@@ -166,6 +166,8 @@ typedef int (dio_iodone_t)(struct kiocb
|
||||
@@ -166,6 +166,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
|
||||
/* File supports DIRECT IO */
|
||||
#define FMODE_CAN_ODIRECT ((__force fmode_t)0x400000)
|
||||
|
||||
+#define FMODE_NOREUSE ((__force fmode_t)0x400000)
|
||||
+#define FMODE_NOREUSE ((__force fmode_t)0x800000)
|
||||
+
|
||||
/* File was opened by fanotify and shouldn't generate fanotify events */
|
||||
#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
|
||||
|
||||
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
|
||||
index 7bb2e5f94734c..9a8e2049333c0 100644
|
||||
--- a/include/linux/mm_inline.h
|
||||
+++ b/include/linux/mm_inline.h
|
||||
@@ -583,6 +583,9 @@ static inline bool vma_has_recency(struc
|
||||
@@ -600,6 +600,9 @@ static inline bool vma_has_recency(struct vm_area_struct *vma)
|
||||
if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))
|
||||
return false;
|
||||
|
||||
@ -102,9 +110,11 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
return true;
|
||||
}
|
||||
|
||||
diff --git a/mm/fadvise.c b/mm/fadvise.c
|
||||
index c76ee665355a4..2ba24d865bf5f 100644
|
||||
--- a/mm/fadvise.c
|
||||
+++ b/mm/fadvise.c
|
||||
@@ -80,7 +80,7 @@ int generic_fadvise(struct file *file, l
|
||||
@@ -80,7 +80,7 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
|
||||
case POSIX_FADV_NORMAL:
|
||||
file->f_ra.ra_pages = bdi->ra_pages;
|
||||
spin_lock(&file->f_lock);
|
||||
@ -113,7 +123,7 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
spin_unlock(&file->f_lock);
|
||||
break;
|
||||
case POSIX_FADV_RANDOM:
|
||||
@@ -107,6 +107,9 @@ int generic_fadvise(struct file *file, l
|
||||
@@ -107,6 +107,9 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
|
||||
force_page_cache_readahead(mapping, file, start_index, nrpages);
|
||||
break;
|
||||
case POSIX_FADV_NOREUSE:
|
||||
@ -123,3 +133,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
break;
|
||||
case POSIX_FADV_DONTNEED:
|
||||
__filemap_fdatawrite_range(mapping, offset, endbyte,
|
||||
--
|
||||
2.40.1
|
||||
|
@ -0,0 +1,74 @@
|
||||
From 1b5e4c317d80f4826eceb3781702d18d06b14394 Mon Sep 17 00:00:00 2001
|
||||
From: "T.J. Alumbaugh" <talumbau@google.com>
|
||||
Date: Wed, 18 Jan 2023 00:18:21 +0000
|
||||
Subject: [PATCH 12/19] UPSTREAM: mm: multi-gen LRU: section for working set
|
||||
protection
|
||||
|
||||
Patch series "mm: multi-gen LRU: improve".
|
||||
|
||||
This patch series improves a few MGLRU functions, collects related
|
||||
functions, and adds additional documentation.
|
||||
|
||||
This patch (of 7):
|
||||
|
||||
Add a section for working set protection in the code and the design doc.
|
||||
The admin doc already contains its usage.
|
||||
|
||||
Link: https://lkml.kernel.org/r/20230118001827.1040870-1-talumbau@google.com
|
||||
Link: https://lkml.kernel.org/r/20230118001827.1040870-2-talumbau@google.com
|
||||
Change-Id: I65599075fd42951db7739a2ab7cee78516e157b3
|
||||
Signed-off-by: T.J. Alumbaugh <talumbau@google.com>
|
||||
Cc: Yu Zhao <yuzhao@google.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
(cherry picked from commit 7b8144e63d84716f16a1b929e0c7e03ae5c4d5c1)
|
||||
Bug: 274865848
|
||||
Signed-off-by: T.J. Mercier <tjmercier@google.com>
|
||||
---
|
||||
Documentation/mm/multigen_lru.rst | 15 +++++++++++++++
|
||||
mm/vmscan.c | 4 ++++
|
||||
2 files changed, 19 insertions(+)
|
||||
|
||||
diff --git a/Documentation/mm/multigen_lru.rst b/Documentation/mm/multigen_lru.rst
|
||||
index d8f721f98868a..6e1483e70fdca 100644
|
||||
--- a/Documentation/mm/multigen_lru.rst
|
||||
+++ b/Documentation/mm/multigen_lru.rst
|
||||
@@ -141,6 +141,21 @@ loop has detected outlying refaults from the tier this page is in. To
|
||||
this end, the feedback loop uses the first tier as the baseline, for
|
||||
the reason stated earlier.
|
||||
|
||||
+Working set protection
|
||||
+----------------------
|
||||
+Each generation is timestamped at birth. If ``lru_gen_min_ttl`` is
|
||||
+set, an ``lruvec`` is protected from the eviction when its oldest
|
||||
+generation was born within ``lru_gen_min_ttl`` milliseconds. In other
|
||||
+words, it prevents the working set of ``lru_gen_min_ttl`` milliseconds
|
||||
+from getting evicted. The OOM killer is triggered if this working set
|
||||
+cannot be kept in memory.
|
||||
+
|
||||
+This time-based approach has the following advantages:
|
||||
+
|
||||
+1. It is easier to configure because it is agnostic to applications
|
||||
+ and memory sizes.
|
||||
+2. It is more reliable because it is directly wired to the OOM killer.
|
||||
+
|
||||
Summary
|
||||
-------
|
||||
The multi-gen LRU can be disassembled into the following parts:
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index 596fed6ae0439..ab0b8d3b9d88f 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -4459,6 +4459,10 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
|
||||
return true;
|
||||
}
|
||||
|
||||
+/******************************************************************************
|
||||
+ * working set protection
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc)
|
||||
{
|
||||
int gen, type, zone;
|
||||
--
|
||||
2.40.1
|
||||
|
@ -0,0 +1,64 @@
|
||||
From 5ddf9d53d375e42af49b744bd7c2f8247c6bce15 Mon Sep 17 00:00:00 2001
|
||||
From: "T.J. Alumbaugh" <talumbau@google.com>
|
||||
Date: Wed, 18 Jan 2023 00:18:22 +0000
|
||||
Subject: [PATCH 13/19] UPSTREAM: mm: multi-gen LRU: section for rmap/PT walk
|
||||
feedback
|
||||
|
||||
Add a section for lru_gen_look_around() in the code and the design doc.
|
||||
|
||||
Link: https://lkml.kernel.org/r/20230118001827.1040870-3-talumbau@google.com
|
||||
Change-Id: I5097af63f61b3b69ec2abee6cdbdc33c296df213
|
||||
Signed-off-by: T.J. Alumbaugh <talumbau@google.com>
|
||||
Cc: Yu Zhao <yuzhao@google.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
(cherry picked from commit db19a43d9b3a8876552f00f656008206ef9a5efa)
|
||||
Bug: 274865848
|
||||
Signed-off-by: T.J. Mercier <tjmercier@google.com>
|
||||
---
|
||||
Documentation/mm/multigen_lru.rst | 14 ++++++++++++++
|
||||
mm/vmscan.c | 4 ++++
|
||||
2 files changed, 18 insertions(+)
|
||||
|
||||
diff --git a/Documentation/mm/multigen_lru.rst b/Documentation/mm/multigen_lru.rst
|
||||
index 6e1483e70fdca..bd988a142bc2f 100644
|
||||
--- a/Documentation/mm/multigen_lru.rst
|
||||
+++ b/Documentation/mm/multigen_lru.rst
|
||||
@@ -156,6 +156,20 @@ This time-based approach has the following advantages:
|
||||
and memory sizes.
|
||||
2. It is more reliable because it is directly wired to the OOM killer.
|
||||
|
||||
+Rmap/PT walk feedback
|
||||
+---------------------
|
||||
+Searching the rmap for PTEs mapping each page on an LRU list (to test
|
||||
+and clear the accessed bit) can be expensive because pages from
|
||||
+different VMAs (PA space) are not cache friendly to the rmap (VA
|
||||
+space). For workloads mostly using mapped pages, searching the rmap
|
||||
+can incur the highest CPU cost in the reclaim path.
|
||||
+
|
||||
+``lru_gen_look_around()`` exploits spatial locality to reduce the
|
||||
+trips into the rmap. It scans the adjacent PTEs of a young PTE and
|
||||
+promotes hot pages. If the scan was done cacheline efficiently, it
|
||||
+adds the PMD entry pointing to the PTE table to the Bloom filter. This
|
||||
+forms a feedback loop between the eviction and the aging.
|
||||
+
|
||||
Summary
|
||||
-------
|
||||
The multi-gen LRU can be disassembled into the following parts:
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index ab0b8d3b9d88f..8fa82630240d6 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -4553,6 +4553,10 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
|
||||
}
|
||||
}
|
||||
|
||||
+/******************************************************************************
|
||||
+ * rmap/PT walk feedback
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
/*
|
||||
* This function exploits spatial locality when shrink_folio_list() walks the
|
||||
* rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages. If
|
||||
--
|
||||
2.40.1
|
||||
|
@ -0,0 +1,250 @@
|
||||
From 397624e12244ec038f51cb1f178ccb7a2ec562e5 Mon Sep 17 00:00:00 2001
|
||||
From: "T.J. Alumbaugh" <talumbau@google.com>
|
||||
Date: Wed, 18 Jan 2023 00:18:23 +0000
|
||||
Subject: [PATCH 14/19] UPSTREAM: mm: multi-gen LRU: section for Bloom filters
|
||||
|
||||
Move Bloom filters code into a dedicated section. Improve the design doc
|
||||
to explain Bloom filter usage and connection between aging and eviction in
|
||||
their use.
|
||||
|
||||
Link: https://lkml.kernel.org/r/20230118001827.1040870-4-talumbau@google.com
|
||||
Change-Id: I73e866f687c1ed9f5c8538086aa39408b79897db
|
||||
Signed-off-by: T.J. Alumbaugh <talumbau@google.com>
|
||||
Cc: Yu Zhao <yuzhao@google.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
(cherry picked from commit ccbbbb85945d8f0255aa9dbc1b617017e2294f2c)
|
||||
Bug: 274865848
|
||||
Signed-off-by: T.J. Mercier <tjmercier@google.com>
|
||||
---
|
||||
Documentation/mm/multigen_lru.rst | 16 +++
|
||||
mm/vmscan.c | 180 +++++++++++++++---------------
|
||||
2 files changed, 108 insertions(+), 88 deletions(-)
|
||||
|
||||
diff --git a/Documentation/mm/multigen_lru.rst b/Documentation/mm/multigen_lru.rst
|
||||
index bd988a142bc2f..770b5d539856c 100644
|
||||
--- a/Documentation/mm/multigen_lru.rst
|
||||
+++ b/Documentation/mm/multigen_lru.rst
|
||||
@@ -170,6 +170,22 @@ promotes hot pages. If the scan was done cacheline efficiently, it
|
||||
adds the PMD entry pointing to the PTE table to the Bloom filter. This
|
||||
forms a feedback loop between the eviction and the aging.
|
||||
|
||||
+Bloom Filters
|
||||
+-------------
|
||||
+Bloom filters are a space and memory efficient data structure for set
|
||||
+membership test, i.e., test if an element is not in the set or may be
|
||||
+in the set.
|
||||
+
|
||||
+In the eviction path, specifically, in ``lru_gen_look_around()``, if a
|
||||
+PMD has a sufficient number of hot pages, its address is placed in the
|
||||
+filter. In the aging path, set membership means that the PTE range
|
||||
+will be scanned for young pages.
|
||||
+
|
||||
+Note that Bloom filters are probabilistic on set membership. If a test
|
||||
+is false positive, the cost is an additional scan of a range of PTEs,
|
||||
+which may yield hot pages anyway. Parameters of the filter itself can
|
||||
+control the false positive rate in the limit.
|
||||
+
|
||||
Summary
|
||||
-------
|
||||
The multi-gen LRU can be disassembled into the following parts:
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index 8fa82630240d6..74b4f9d660b56 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -3208,6 +3208,98 @@ static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
|
||||
get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS;
|
||||
}
|
||||
|
||||
+/******************************************************************************
|
||||
+ * Bloom filters
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
+/*
|
||||
+ * Bloom filters with m=1<<15, k=2 and the false positive rates of ~1/5 when
|
||||
+ * n=10,000 and ~1/2 when n=20,000, where, conventionally, m is the number of
|
||||
+ * bits in a bitmap, k is the number of hash functions and n is the number of
|
||||
+ * inserted items.
|
||||
+ *
|
||||
+ * Page table walkers use one of the two filters to reduce their search space.
|
||||
+ * To get rid of non-leaf entries that no longer have enough leaf entries, the
|
||||
+ * aging uses the double-buffering technique to flip to the other filter each
|
||||
+ * time it produces a new generation. For non-leaf entries that have enough
|
||||
+ * leaf entries, the aging carries them over to the next generation in
|
||||
+ * walk_pmd_range(); the eviction also report them when walking the rmap
|
||||
+ * in lru_gen_look_around().
|
||||
+ *
|
||||
+ * For future optimizations:
|
||||
+ * 1. It's not necessary to keep both filters all the time. The spare one can be
|
||||
+ * freed after the RCU grace period and reallocated if needed again.
|
||||
+ * 2. And when reallocating, it's worth scaling its size according to the number
|
||||
+ * of inserted entries in the other filter, to reduce the memory overhead on
|
||||
+ * small systems and false positives on large systems.
|
||||
+ * 3. Jenkins' hash function is an alternative to Knuth's.
|
||||
+ */
|
||||
+#define BLOOM_FILTER_SHIFT 15
|
||||
+
|
||||
+static inline int filter_gen_from_seq(unsigned long seq)
|
||||
+{
|
||||
+ return seq % NR_BLOOM_FILTERS;
|
||||
+}
|
||||
+
|
||||
+static void get_item_key(void *item, int *key)
|
||||
+{
|
||||
+ u32 hash = hash_ptr(item, BLOOM_FILTER_SHIFT * 2);
|
||||
+
|
||||
+ BUILD_BUG_ON(BLOOM_FILTER_SHIFT * 2 > BITS_PER_TYPE(u32));
|
||||
+
|
||||
+ key[0] = hash & (BIT(BLOOM_FILTER_SHIFT) - 1);
|
||||
+ key[1] = hash >> BLOOM_FILTER_SHIFT;
|
||||
+}
|
||||
+
|
||||
+static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
|
||||
+{
|
||||
+ int key[2];
|
||||
+ unsigned long *filter;
|
||||
+ int gen = filter_gen_from_seq(seq);
|
||||
+
|
||||
+ filter = READ_ONCE(lruvec->mm_state.filters[gen]);
|
||||
+ if (!filter)
|
||||
+ return true;
|
||||
+
|
||||
+ get_item_key(item, key);
|
||||
+
|
||||
+ return test_bit(key[0], filter) && test_bit(key[1], filter);
|
||||
+}
|
||||
+
|
||||
+static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
|
||||
+{
|
||||
+ int key[2];
|
||||
+ unsigned long *filter;
|
||||
+ int gen = filter_gen_from_seq(seq);
|
||||
+
|
||||
+ filter = READ_ONCE(lruvec->mm_state.filters[gen]);
|
||||
+ if (!filter)
|
||||
+ return;
|
||||
+
|
||||
+ get_item_key(item, key);
|
||||
+
|
||||
+ if (!test_bit(key[0], filter))
|
||||
+ set_bit(key[0], filter);
|
||||
+ if (!test_bit(key[1], filter))
|
||||
+ set_bit(key[1], filter);
|
||||
+}
|
||||
+
|
||||
+static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq)
|
||||
+{
|
||||
+ unsigned long *filter;
|
||||
+ int gen = filter_gen_from_seq(seq);
|
||||
+
|
||||
+ filter = lruvec->mm_state.filters[gen];
|
||||
+ if (filter) {
|
||||
+ bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT));
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT),
|
||||
+ __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
|
||||
+ WRITE_ONCE(lruvec->mm_state.filters[gen], filter);
|
||||
+}
|
||||
+
|
||||
/******************************************************************************
|
||||
* mm_struct list
|
||||
******************************************************************************/
|
||||
@@ -3333,94 +3425,6 @@ void lru_gen_migrate_mm(struct mm_struct *mm)
|
||||
}
|
||||
#endif
|
||||
|
||||
-/*
|
||||
- * Bloom filters with m=1<<15, k=2 and the false positive rates of ~1/5 when
|
||||
- * n=10,000 and ~1/2 when n=20,000, where, conventionally, m is the number of
|
||||
- * bits in a bitmap, k is the number of hash functions and n is the number of
|
||||
- * inserted items.
|
||||
- *
|
||||
- * Page table walkers use one of the two filters to reduce their search space.
|
||||
- * To get rid of non-leaf entries that no longer have enough leaf entries, the
|
||||
- * aging uses the double-buffering technique to flip to the other filter each
|
||||
- * time it produces a new generation. For non-leaf entries that have enough
|
||||
- * leaf entries, the aging carries them over to the next generation in
|
||||
- * walk_pmd_range(); the eviction also report them when walking the rmap
|
||||
- * in lru_gen_look_around().
|
||||
- *
|
||||
- * For future optimizations:
|
||||
- * 1. It's not necessary to keep both filters all the time. The spare one can be
|
||||
- * freed after the RCU grace period and reallocated if needed again.
|
||||
- * 2. And when reallocating, it's worth scaling its size according to the number
|
||||
- * of inserted entries in the other filter, to reduce the memory overhead on
|
||||
- * small systems and false positives on large systems.
|
||||
- * 3. Jenkins' hash function is an alternative to Knuth's.
|
||||
- */
|
||||
-#define BLOOM_FILTER_SHIFT 15
|
||||
-
|
||||
-static inline int filter_gen_from_seq(unsigned long seq)
|
||||
-{
|
||||
- return seq % NR_BLOOM_FILTERS;
|
||||
-}
|
||||
-
|
||||
-static void get_item_key(void *item, int *key)
|
||||
-{
|
||||
- u32 hash = hash_ptr(item, BLOOM_FILTER_SHIFT * 2);
|
||||
-
|
||||
- BUILD_BUG_ON(BLOOM_FILTER_SHIFT * 2 > BITS_PER_TYPE(u32));
|
||||
-
|
||||
- key[0] = hash & (BIT(BLOOM_FILTER_SHIFT) - 1);
|
||||
- key[1] = hash >> BLOOM_FILTER_SHIFT;
|
||||
-}
|
||||
-
|
||||
-static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq)
|
||||
-{
|
||||
- unsigned long *filter;
|
||||
- int gen = filter_gen_from_seq(seq);
|
||||
-
|
||||
- filter = lruvec->mm_state.filters[gen];
|
||||
- if (filter) {
|
||||
- bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT));
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT),
|
||||
- __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
|
||||
- WRITE_ONCE(lruvec->mm_state.filters[gen], filter);
|
||||
-}
|
||||
-
|
||||
-static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
|
||||
-{
|
||||
- int key[2];
|
||||
- unsigned long *filter;
|
||||
- int gen = filter_gen_from_seq(seq);
|
||||
-
|
||||
- filter = READ_ONCE(lruvec->mm_state.filters[gen]);
|
||||
- if (!filter)
|
||||
- return;
|
||||
-
|
||||
- get_item_key(item, key);
|
||||
-
|
||||
- if (!test_bit(key[0], filter))
|
||||
- set_bit(key[0], filter);
|
||||
- if (!test_bit(key[1], filter))
|
||||
- set_bit(key[1], filter);
|
||||
-}
|
||||
-
|
||||
-static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
|
||||
-{
|
||||
- int key[2];
|
||||
- unsigned long *filter;
|
||||
- int gen = filter_gen_from_seq(seq);
|
||||
-
|
||||
- filter = READ_ONCE(lruvec->mm_state.filters[gen]);
|
||||
- if (!filter)
|
||||
- return true;
|
||||
-
|
||||
- get_item_key(item, key);
|
||||
-
|
||||
- return test_bit(key[0], filter) && test_bit(key[1], filter);
|
||||
-}
|
||||
-
|
||||
static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, bool last)
|
||||
{
|
||||
int i;
|
||||
--
|
||||
2.40.1
|
||||
|
@ -0,0 +1,440 @@
|
||||
From 48c916b812652f9453be5bd45a703728926d41ca Mon Sep 17 00:00:00 2001
|
||||
From: "T.J. Alumbaugh" <talumbau@google.com>
|
||||
Date: Wed, 18 Jan 2023 00:18:24 +0000
|
||||
Subject: [PATCH 15/19] UPSTREAM: mm: multi-gen LRU: section for memcg LRU
|
||||
|
||||
Move memcg LRU code into a dedicated section. Improve the design doc to
|
||||
outline its architecture.
|
||||
|
||||
Link: https://lkml.kernel.org/r/20230118001827.1040870-5-talumbau@google.com
|
||||
Change-Id: Id252e420cff7a858acb098cf2b3642da5c40f602
|
||||
Signed-off-by: T.J. Alumbaugh <talumbau@google.com>
|
||||
Cc: Yu Zhao <yuzhao@google.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
(cherry picked from commit 36c7b4db7c942ae9e1b111f0c6b468c8b2e33842)
|
||||
Bug: 274865848
|
||||
Signed-off-by: T.J. Mercier <tjmercier@google.com>
|
||||
---
|
||||
Documentation/mm/multigen_lru.rst | 33 +++-
|
||||
include/linux/mm_inline.h | 17 --
|
||||
include/linux/mmzone.h | 13 +-
|
||||
mm/memcontrol.c | 8 +-
|
||||
mm/vmscan.c | 250 +++++++++++++++++-------------
|
||||
5 files changed, 178 insertions(+), 143 deletions(-)
|
||||
|
||||
diff --git a/Documentation/mm/multigen_lru.rst b/Documentation/mm/multigen_lru.rst
|
||||
index 770b5d539856c..5f1f6ecbb79b9 100644
|
||||
--- a/Documentation/mm/multigen_lru.rst
|
||||
+++ b/Documentation/mm/multigen_lru.rst
|
||||
@@ -186,9 +186,40 @@ is false positive, the cost is an additional scan of a range of PTEs,
|
||||
which may yield hot pages anyway. Parameters of the filter itself can
|
||||
control the false positive rate in the limit.
|
||||
|
||||
+Memcg LRU
|
||||
+---------
|
||||
+An memcg LRU is a per-node LRU of memcgs. It is also an LRU of LRUs,
|
||||
+since each node and memcg combination has an LRU of folios (see
|
||||
+``mem_cgroup_lruvec()``). Its goal is to improve the scalability of
|
||||
+global reclaim, which is critical to system-wide memory overcommit in
|
||||
+data centers. Note that memcg LRU only applies to global reclaim.
|
||||
+
|
||||
+The basic structure of an memcg LRU can be understood by an analogy to
|
||||
+the active/inactive LRU (of folios):
|
||||
+
|
||||
+1. It has the young and the old (generations), i.e., the counterparts
|
||||
+ to the active and the inactive;
|
||||
+2. The increment of ``max_seq`` triggers promotion, i.e., the
|
||||
+ counterpart to activation;
|
||||
+3. Other events trigger similar operations, e.g., offlining an memcg
|
||||
+ triggers demotion, i.e., the counterpart to deactivation.
|
||||
+
|
||||
+In terms of global reclaim, it has two distinct features:
|
||||
+
|
||||
+1. Sharding, which allows each thread to start at a random memcg (in
|
||||
+ the old generation) and improves parallelism;
|
||||
+2. Eventual fairness, which allows direct reclaim to bail out at will
|
||||
+ and reduces latency without affecting fairness over some time.
|
||||
+
|
||||
+In terms of traversing memcgs during global reclaim, it improves the
|
||||
+best-case complexity from O(n) to O(1) and does not affect the
|
||||
+worst-case complexity O(n). Therefore, on average, it has a sublinear
|
||||
+complexity.
|
||||
+
|
||||
Summary
|
||||
-------
|
||||
-The multi-gen LRU can be disassembled into the following parts:
|
||||
+The multi-gen LRU (of folios) can be disassembled into the following
|
||||
+parts:
|
||||
|
||||
* Generations
|
||||
* Rmap walks
|
||||
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
|
||||
index 9a8e2049333c0..5567f4850243b 100644
|
||||
--- a/include/linux/mm_inline.h
|
||||
+++ b/include/linux/mm_inline.h
|
||||
@@ -122,18 +122,6 @@ static inline bool lru_gen_in_fault(void)
|
||||
return current->in_lru_fault;
|
||||
}
|
||||
|
||||
-#ifdef CONFIG_MEMCG
|
||||
-static inline int lru_gen_memcg_seg(struct lruvec *lruvec)
|
||||
-{
|
||||
- return READ_ONCE(lruvec->lrugen.seg);
|
||||
-}
|
||||
-#else
|
||||
-static inline int lru_gen_memcg_seg(struct lruvec *lruvec)
|
||||
-{
|
||||
- return 0;
|
||||
-}
|
||||
-#endif
|
||||
-
|
||||
static inline int lru_gen_from_seq(unsigned long seq)
|
||||
{
|
||||
return seq % MAX_NR_GENS;
|
||||
@@ -309,11 +297,6 @@ static inline bool lru_gen_in_fault(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
-static inline int lru_gen_memcg_seg(struct lruvec *lruvec)
|
||||
-{
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
|
||||
{
|
||||
return false;
|
||||
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
|
||||
index 66e067a635682..403c7461e7a70 100644
|
||||
--- a/include/linux/mmzone.h
|
||||
+++ b/include/linux/mmzone.h
|
||||
@@ -368,15 +368,6 @@ struct page_vma_mapped_walk;
|
||||
#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
|
||||
#define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
|
||||
|
||||
-/* see the comment on MEMCG_NR_GENS */
|
||||
-enum {
|
||||
- MEMCG_LRU_NOP,
|
||||
- MEMCG_LRU_HEAD,
|
||||
- MEMCG_LRU_TAIL,
|
||||
- MEMCG_LRU_OLD,
|
||||
- MEMCG_LRU_YOUNG,
|
||||
-};
|
||||
-
|
||||
#ifdef CONFIG_LRU_GEN
|
||||
|
||||
enum {
|
||||
@@ -557,7 +548,7 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg);
|
||||
void lru_gen_online_memcg(struct mem_cgroup *memcg);
|
||||
void lru_gen_offline_memcg(struct mem_cgroup *memcg);
|
||||
void lru_gen_release_memcg(struct mem_cgroup *memcg);
|
||||
-void lru_gen_rotate_memcg(struct lruvec *lruvec, int op);
|
||||
+void lru_gen_soft_reclaim(struct lruvec *lruvec);
|
||||
|
||||
#else /* !CONFIG_MEMCG */
|
||||
|
||||
@@ -608,7 +599,7 @@ static inline void lru_gen_release_memcg(struct mem_cgroup *memcg)
|
||||
{
|
||||
}
|
||||
|
||||
-static inline void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
|
||||
+static inline void lru_gen_soft_reclaim(struct lruvec *lruvec)
|
||||
{
|
||||
}
|
||||
|
||||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
|
||||
index 7815d556e38cc..5397aeb43986d 100644
|
||||
--- a/mm/memcontrol.c
|
||||
+++ b/mm/memcontrol.c
|
||||
@@ -478,12 +478,8 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid)
|
||||
struct mem_cgroup_tree_per_node *mctz;
|
||||
|
||||
if (lru_gen_enabled()) {
|
||||
- struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec;
|
||||
-
|
||||
- /* see the comment on MEMCG_NR_GENS */
|
||||
- if (soft_limit_excess(memcg) && lru_gen_memcg_seg(lruvec) != MEMCG_LRU_HEAD)
|
||||
- lru_gen_rotate_memcg(lruvec, MEMCG_LRU_HEAD);
|
||||
-
|
||||
+ if (soft_limit_excess(memcg))
|
||||
+ lru_gen_soft_reclaim(&memcg->nodeinfo[nid]->lruvec);
|
||||
return;
|
||||
}
|
||||
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index 74b4f9d660b56..ccde215c084ca 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -4689,6 +4689,148 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
|
||||
mem_cgroup_unlock_pages();
|
||||
}
|
||||
|
||||
+/******************************************************************************
|
||||
+ * memcg LRU
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
+/* see the comment on MEMCG_NR_GENS */
|
||||
+enum {
|
||||
+ MEMCG_LRU_NOP,
|
||||
+ MEMCG_LRU_HEAD,
|
||||
+ MEMCG_LRU_TAIL,
|
||||
+ MEMCG_LRU_OLD,
|
||||
+ MEMCG_LRU_YOUNG,
|
||||
+};
|
||||
+
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+
|
||||
+static int lru_gen_memcg_seg(struct lruvec *lruvec)
|
||||
+{
|
||||
+ return READ_ONCE(lruvec->lrugen.seg);
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
|
||||
+{
|
||||
+ int seg;
|
||||
+ int old, new;
|
||||
+ int bin = get_random_u32_below(MEMCG_NR_BINS);
|
||||
+ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
||||
+
|
||||
+ spin_lock(&pgdat->memcg_lru.lock);
|
||||
+
|
||||
+ VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));
|
||||
+
|
||||
+ seg = 0;
|
||||
+ new = old = lruvec->lrugen.gen;
|
||||
+
|
||||
+ /* see the comment on MEMCG_NR_GENS */
|
||||
+ if (op == MEMCG_LRU_HEAD)
|
||||
+ seg = MEMCG_LRU_HEAD;
|
||||
+ else if (op == MEMCG_LRU_TAIL)
|
||||
+ seg = MEMCG_LRU_TAIL;
|
||||
+ else if (op == MEMCG_LRU_OLD)
|
||||
+ new = get_memcg_gen(pgdat->memcg_lru.seq);
|
||||
+ else if (op == MEMCG_LRU_YOUNG)
|
||||
+ new = get_memcg_gen(pgdat->memcg_lru.seq + 1);
|
||||
+ else
|
||||
+ VM_WARN_ON_ONCE(true);
|
||||
+
|
||||
+ hlist_nulls_del_rcu(&lruvec->lrugen.list);
|
||||
+
|
||||
+ if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD)
|
||||
+ hlist_nulls_add_head_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
|
||||
+ else
|
||||
+ hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
|
||||
+
|
||||
+ pgdat->memcg_lru.nr_memcgs[old]--;
|
||||
+ pgdat->memcg_lru.nr_memcgs[new]++;
|
||||
+
|
||||
+ lruvec->lrugen.gen = new;
|
||||
+ WRITE_ONCE(lruvec->lrugen.seg, seg);
|
||||
+
|
||||
+ if (!pgdat->memcg_lru.nr_memcgs[old] && old == get_memcg_gen(pgdat->memcg_lru.seq))
|
||||
+ WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
|
||||
+
|
||||
+ spin_unlock(&pgdat->memcg_lru.lock);
|
||||
+}
|
||||
+
|
||||
+void lru_gen_online_memcg(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+ int gen;
|
||||
+ int nid;
|
||||
+ int bin = get_random_u32_below(MEMCG_NR_BINS);
|
||||
+
|
||||
+ for_each_node(nid) {
|
||||
+ struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
+ struct lruvec *lruvec = get_lruvec(memcg, nid);
|
||||
+
|
||||
+ spin_lock(&pgdat->memcg_lru.lock);
|
||||
+
|
||||
+ VM_WARN_ON_ONCE(!hlist_nulls_unhashed(&lruvec->lrugen.list));
|
||||
+
|
||||
+ gen = get_memcg_gen(pgdat->memcg_lru.seq);
|
||||
+
|
||||
+ hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]);
|
||||
+ pgdat->memcg_lru.nr_memcgs[gen]++;
|
||||
+
|
||||
+ lruvec->lrugen.gen = gen;
|
||||
+
|
||||
+ spin_unlock(&pgdat->memcg_lru.lock);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+void lru_gen_offline_memcg(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+ int nid;
|
||||
+
|
||||
+ for_each_node(nid) {
|
||||
+ struct lruvec *lruvec = get_lruvec(memcg, nid);
|
||||
+
|
||||
+ lru_gen_rotate_memcg(lruvec, MEMCG_LRU_OLD);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+void lru_gen_release_memcg(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+ int gen;
|
||||
+ int nid;
|
||||
+
|
||||
+ for_each_node(nid) {
|
||||
+ struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
+ struct lruvec *lruvec = get_lruvec(memcg, nid);
|
||||
+
|
||||
+ spin_lock(&pgdat->memcg_lru.lock);
|
||||
+
|
||||
+ VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));
|
||||
+
|
||||
+ gen = lruvec->lrugen.gen;
|
||||
+
|
||||
+ hlist_nulls_del_rcu(&lruvec->lrugen.list);
|
||||
+ pgdat->memcg_lru.nr_memcgs[gen]--;
|
||||
+
|
||||
+ if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq))
|
||||
+ WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
|
||||
+
|
||||
+ spin_unlock(&pgdat->memcg_lru.lock);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+void lru_gen_soft_reclaim(struct lruvec *lruvec)
|
||||
+{
|
||||
+ /* see the comment on MEMCG_NR_GENS */
|
||||
+ if (lru_gen_memcg_seg(lruvec) != MEMCG_LRU_HEAD)
|
||||
+ lru_gen_rotate_memcg(lruvec, MEMCG_LRU_HEAD);
|
||||
+}
|
||||
+
|
||||
+#else /* !CONFIG_MEMCG */
|
||||
+
|
||||
+static int lru_gen_memcg_seg(struct lruvec *lruvec)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
/******************************************************************************
|
||||
* the eviction
|
||||
******************************************************************************/
|
||||
@@ -5386,53 +5528,6 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *
|
||||
pgdat->kswapd_failures = 0;
|
||||
}
|
||||
|
||||
-#ifdef CONFIG_MEMCG
|
||||
-void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
|
||||
-{
|
||||
- int seg;
|
||||
- int old, new;
|
||||
- int bin = get_random_u32_below(MEMCG_NR_BINS);
|
||||
- struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
||||
-
|
||||
- spin_lock(&pgdat->memcg_lru.lock);
|
||||
-
|
||||
- VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));
|
||||
-
|
||||
- seg = 0;
|
||||
- new = old = lruvec->lrugen.gen;
|
||||
-
|
||||
- /* see the comment on MEMCG_NR_GENS */
|
||||
- if (op == MEMCG_LRU_HEAD)
|
||||
- seg = MEMCG_LRU_HEAD;
|
||||
- else if (op == MEMCG_LRU_TAIL)
|
||||
- seg = MEMCG_LRU_TAIL;
|
||||
- else if (op == MEMCG_LRU_OLD)
|
||||
- new = get_memcg_gen(pgdat->memcg_lru.seq);
|
||||
- else if (op == MEMCG_LRU_YOUNG)
|
||||
- new = get_memcg_gen(pgdat->memcg_lru.seq + 1);
|
||||
- else
|
||||
- VM_WARN_ON_ONCE(true);
|
||||
-
|
||||
- hlist_nulls_del_rcu(&lruvec->lrugen.list);
|
||||
-
|
||||
- if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD)
|
||||
- hlist_nulls_add_head_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
|
||||
- else
|
||||
- hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
|
||||
-
|
||||
- pgdat->memcg_lru.nr_memcgs[old]--;
|
||||
- pgdat->memcg_lru.nr_memcgs[new]++;
|
||||
-
|
||||
- lruvec->lrugen.gen = new;
|
||||
- WRITE_ONCE(lruvec->lrugen.seg, seg);
|
||||
-
|
||||
- if (!pgdat->memcg_lru.nr_memcgs[old] && old == get_memcg_gen(pgdat->memcg_lru.seq))
|
||||
- WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
|
||||
-
|
||||
- spin_unlock(&pgdat->memcg_lru.lock);
|
||||
-}
|
||||
-#endif
|
||||
-
|
||||
/******************************************************************************
|
||||
* state change
|
||||
******************************************************************************/
|
||||
@@ -6078,67 +6173,6 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg)
|
||||
}
|
||||
}
|
||||
|
||||
-void lru_gen_online_memcg(struct mem_cgroup *memcg)
|
||||
-{
|
||||
- int gen;
|
||||
- int nid;
|
||||
- int bin = get_random_u32_below(MEMCG_NR_BINS);
|
||||
-
|
||||
- for_each_node(nid) {
|
||||
- struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
- struct lruvec *lruvec = get_lruvec(memcg, nid);
|
||||
-
|
||||
- spin_lock(&pgdat->memcg_lru.lock);
|
||||
-
|
||||
- VM_WARN_ON_ONCE(!hlist_nulls_unhashed(&lruvec->lrugen.list));
|
||||
-
|
||||
- gen = get_memcg_gen(pgdat->memcg_lru.seq);
|
||||
-
|
||||
- hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]);
|
||||
- pgdat->memcg_lru.nr_memcgs[gen]++;
|
||||
-
|
||||
- lruvec->lrugen.gen = gen;
|
||||
-
|
||||
- spin_unlock(&pgdat->memcg_lru.lock);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-void lru_gen_offline_memcg(struct mem_cgroup *memcg)
|
||||
-{
|
||||
- int nid;
|
||||
-
|
||||
- for_each_node(nid) {
|
||||
- struct lruvec *lruvec = get_lruvec(memcg, nid);
|
||||
-
|
||||
- lru_gen_rotate_memcg(lruvec, MEMCG_LRU_OLD);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-void lru_gen_release_memcg(struct mem_cgroup *memcg)
|
||||
-{
|
||||
- int gen;
|
||||
- int nid;
|
||||
-
|
||||
- for_each_node(nid) {
|
||||
- struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
- struct lruvec *lruvec = get_lruvec(memcg, nid);
|
||||
-
|
||||
- spin_lock(&pgdat->memcg_lru.lock);
|
||||
-
|
||||
- VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));
|
||||
-
|
||||
- gen = lruvec->lrugen.gen;
|
||||
-
|
||||
- hlist_nulls_del_rcu(&lruvec->lrugen.list);
|
||||
- pgdat->memcg_lru.nr_memcgs[gen]--;
|
||||
-
|
||||
- if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq))
|
||||
- WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
|
||||
-
|
||||
- spin_unlock(&pgdat->memcg_lru.lock);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
#endif /* CONFIG_MEMCG */
|
||||
|
||||
static int __init init_lru_gen(void)
|
||||
--
|
||||
2.40.1
|
||||
|
@ -0,0 +1,45 @@
|
||||
From bec433f29537652ed054148edfd7e2183ddcf7c3 Mon Sep 17 00:00:00 2001
|
||||
From: "T.J. Alumbaugh" <talumbau@google.com>
|
||||
Date: Wed, 18 Jan 2023 00:18:25 +0000
|
||||
Subject: [PATCH 16/19] UPSTREAM: mm: multi-gen LRU: improve
|
||||
lru_gen_exit_memcg()
|
||||
|
||||
Add warnings and poison ->next.
|
||||
|
||||
Link: https://lkml.kernel.org/r/20230118001827.1040870-6-talumbau@google.com
|
||||
Change-Id: I53de9e04c1ae941e122b33cd45d2bbb5f34aae0c
|
||||
Signed-off-by: T.J. Alumbaugh <talumbau@google.com>
|
||||
Cc: Yu Zhao <yuzhao@google.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
(cherry picked from commit 37cc99979d04cca677c0ad5c0acd1149ec165d1b)
|
||||
Bug: 274865848
|
||||
Signed-off-by: T.J. Mercier <tjmercier@google.com>
|
||||
---
|
||||
mm/vmscan.c | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index ccde215c084ca..d5d6f8d94f58a 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -6160,12 +6160,17 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg)
|
||||
int i;
|
||||
int nid;
|
||||
|
||||
+ VM_WARN_ON_ONCE(!list_empty(&memcg->mm_list.fifo));
|
||||
+
|
||||
for_each_node(nid) {
|
||||
struct lruvec *lruvec = get_lruvec(memcg, nid);
|
||||
|
||||
+ VM_WARN_ON_ONCE(lruvec->mm_state.nr_walkers);
|
||||
VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0,
|
||||
sizeof(lruvec->lrugen.nr_pages)));
|
||||
|
||||
+ lruvec->lrugen.list.next = LIST_POISON1;
|
||||
+
|
||||
for (i = 0; i < NR_BLOOM_FILTERS; i++) {
|
||||
bitmap_free(lruvec->mm_state.filters[i]);
|
||||
lruvec->mm_state.filters[i] = NULL;
|
||||
--
|
||||
2.40.1
|
||||
|
@ -0,0 +1,140 @@
|
||||
From fc0e3b06e0f19917b7ecad7967a72f61d4743644 Mon Sep 17 00:00:00 2001
|
||||
From: "T.J. Alumbaugh" <talumbau@google.com>
|
||||
Date: Wed, 18 Jan 2023 00:18:26 +0000
|
||||
Subject: [PATCH 17/19] UPSTREAM: mm: multi-gen LRU: improve walk_pmd_range()
|
||||
|
||||
Improve readability of walk_pmd_range() and walk_pmd_range_locked().
|
||||
|
||||
Link: https://lkml.kernel.org/r/20230118001827.1040870-7-talumbau@google.com
|
||||
Change-Id: Ia084fbf53fe989673b7804ca8ca520af12d7d52a
|
||||
Signed-off-by: T.J. Alumbaugh <talumbau@google.com>
|
||||
Cc: Yu Zhao <yuzhao@google.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
(cherry picked from commit b5ff4133617d0eced35b685da0bd0929dd9fabb7)
|
||||
Bug: 274865848
|
||||
Signed-off-by: T.J. Mercier <tjmercier@google.com>
|
||||
---
|
||||
mm/vmscan.c | 40 ++++++++++++++++++++--------------------
|
||||
1 file changed, 20 insertions(+), 20 deletions(-)
|
||||
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index d5d6f8d94f58a..8f496c2e670a9 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -3980,8 +3980,8 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end,
|
||||
}
|
||||
|
||||
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
|
||||
-static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area_struct *vma,
|
||||
- struct mm_walk *args, unsigned long *bitmap, unsigned long *start)
|
||||
+static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area_struct *vma,
|
||||
+ struct mm_walk *args, unsigned long *bitmap, unsigned long *first)
|
||||
{
|
||||
int i;
|
||||
pmd_t *pmd;
|
||||
@@ -3994,18 +3994,19 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area
|
||||
VM_WARN_ON_ONCE(pud_leaf(*pud));
|
||||
|
||||
/* try to batch at most 1+MIN_LRU_BATCH+1 entries */
|
||||
- if (*start == -1) {
|
||||
- *start = next;
|
||||
+ if (*first == -1) {
|
||||
+ *first = addr;
|
||||
+ bitmap_zero(bitmap, MIN_LRU_BATCH);
|
||||
return;
|
||||
}
|
||||
|
||||
- i = next == -1 ? 0 : pmd_index(next) - pmd_index(*start);
|
||||
+ i = addr == -1 ? 0 : pmd_index(addr) - pmd_index(*first);
|
||||
if (i && i <= MIN_LRU_BATCH) {
|
||||
__set_bit(i - 1, bitmap);
|
||||
return;
|
||||
}
|
||||
|
||||
- pmd = pmd_offset(pud, *start);
|
||||
+ pmd = pmd_offset(pud, *first);
|
||||
|
||||
ptl = pmd_lockptr(args->mm, pmd);
|
||||
if (!spin_trylock(ptl))
|
||||
@@ -4016,15 +4017,16 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area
|
||||
do {
|
||||
unsigned long pfn;
|
||||
struct folio *folio;
|
||||
- unsigned long addr = i ? (*start & PMD_MASK) + i * PMD_SIZE : *start;
|
||||
+
|
||||
+ /* don't round down the first address */
|
||||
+ addr = i ? (*first & PMD_MASK) + i * PMD_SIZE : *first;
|
||||
|
||||
pfn = get_pmd_pfn(pmd[i], vma, addr);
|
||||
if (pfn == -1)
|
||||
goto next;
|
||||
|
||||
if (!pmd_trans_huge(pmd[i])) {
|
||||
- if (arch_has_hw_nonleaf_pmd_young() &&
|
||||
- get_cap(LRU_GEN_NONLEAF_YOUNG))
|
||||
+ if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG))
|
||||
pmdp_test_and_clear_young(vma, addr, pmd + i);
|
||||
goto next;
|
||||
}
|
||||
@@ -4053,12 +4055,11 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area
|
||||
arch_leave_lazy_mmu_mode();
|
||||
spin_unlock(ptl);
|
||||
done:
|
||||
- *start = -1;
|
||||
- bitmap_zero(bitmap, MIN_LRU_BATCH);
|
||||
+ *first = -1;
|
||||
}
|
||||
#else
|
||||
-static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area_struct *vma,
|
||||
- struct mm_walk *args, unsigned long *bitmap, unsigned long *start)
|
||||
+static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area_struct *vma,
|
||||
+ struct mm_walk *args, unsigned long *bitmap, unsigned long *first)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
@@ -4071,9 +4072,9 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
|
||||
unsigned long next;
|
||||
unsigned long addr;
|
||||
struct vm_area_struct *vma;
|
||||
- unsigned long pos = -1;
|
||||
+ unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)];
|
||||
+ unsigned long first = -1;
|
||||
struct lru_gen_mm_walk *walk = args->private;
|
||||
- unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {};
|
||||
|
||||
VM_WARN_ON_ONCE(pud_leaf(*pud));
|
||||
|
||||
@@ -4115,18 +4116,17 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
|
||||
if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
|
||||
continue;
|
||||
|
||||
- walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
|
||||
+ walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
walk->mm_stats[MM_NONLEAF_TOTAL]++;
|
||||
|
||||
- if (arch_has_hw_nonleaf_pmd_young() &&
|
||||
- get_cap(LRU_GEN_NONLEAF_YOUNG)) {
|
||||
+ if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) {
|
||||
if (!pmd_young(val))
|
||||
continue;
|
||||
|
||||
- walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
|
||||
+ walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first);
|
||||
}
|
||||
|
||||
if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
|
||||
@@ -4143,7 +4143,7 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
|
||||
update_bloom_filter(walk->lruvec, walk->max_seq + 1, pmd + i);
|
||||
}
|
||||
|
||||
- walk_pmd_range_locked(pud, -1, vma, args, bitmap, &pos);
|
||||
+ walk_pmd_range_locked(pud, -1, vma, args, bitmap, &first);
|
||||
|
||||
if (i < PTRS_PER_PMD && get_next_vma(PUD_MASK, PMD_SIZE, args, &start, &end))
|
||||
goto restart;
|
||||
--
|
||||
2.40.1
|
||||
|
@ -0,0 +1,153 @@
|
||||
From e604c3ccb4dfbdde2467fccef9bb36170a392695 Mon Sep 17 00:00:00 2001
|
||||
From: "T.J. Alumbaugh" <talumbau@google.com>
|
||||
Date: Wed, 18 Jan 2023 00:18:27 +0000
|
||||
Subject: [PATCH 18/19] UPSTREAM: mm: multi-gen LRU: simplify
|
||||
lru_gen_look_around()
|
||||
|
||||
Update the folio generation in place with or without
|
||||
current->reclaim_state->mm_walk. The LRU lock is held for longer, if
|
||||
mm_walk is NULL and the number of folios to update is more than
|
||||
PAGEVEC_SIZE.
|
||||
|
||||
This causes a measurable regression from the LRU lock contention during a
|
||||
microbencmark. But a tiny regression is not worth the complexity.
|
||||
|
||||
Link: https://lkml.kernel.org/r/20230118001827.1040870-8-talumbau@google.com
|
||||
Change-Id: I9ce18b4f4062e6c1c13c98ece9422478eb8e1846
|
||||
Signed-off-by: T.J. Alumbaugh <talumbau@google.com>
|
||||
Cc: Yu Zhao <yuzhao@google.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
(cherry picked from commit abf086721a2f1e6897c57796f7268df1b194c750)
|
||||
Bug: 274865848
|
||||
Signed-off-by: T.J. Mercier <tjmercier@google.com>
|
||||
---
|
||||
mm/vmscan.c | 73 +++++++++++++++++------------------------------------
|
||||
1 file changed, 23 insertions(+), 50 deletions(-)
|
||||
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index 8f496c2e670a9..f6ce7a1fd78a3 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -4571,13 +4571,12 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
|
||||
void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
|
||||
{
|
||||
int i;
|
||||
- pte_t *pte;
|
||||
unsigned long start;
|
||||
unsigned long end;
|
||||
- unsigned long addr;
|
||||
struct lru_gen_mm_walk *walk;
|
||||
int young = 0;
|
||||
- unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {};
|
||||
+ pte_t *pte = pvmw->pte;
|
||||
+ unsigned long addr = pvmw->address;
|
||||
struct folio *folio = pfn_folio(pvmw->pfn);
|
||||
struct mem_cgroup *memcg = folio_memcg(folio);
|
||||
struct pglist_data *pgdat = folio_pgdat(folio);
|
||||
@@ -4594,25 +4593,28 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
|
||||
/* avoid taking the LRU lock under the PTL when possible */
|
||||
walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL;
|
||||
|
||||
- start = max(pvmw->address & PMD_MASK, pvmw->vma->vm_start);
|
||||
- end = min(pvmw->address | ~PMD_MASK, pvmw->vma->vm_end - 1) + 1;
|
||||
+ start = max(addr & PMD_MASK, pvmw->vma->vm_start);
|
||||
+ end = min(addr | ~PMD_MASK, pvmw->vma->vm_end - 1) + 1;
|
||||
|
||||
if (end - start > MIN_LRU_BATCH * PAGE_SIZE) {
|
||||
- if (pvmw->address - start < MIN_LRU_BATCH * PAGE_SIZE / 2)
|
||||
+ if (addr - start < MIN_LRU_BATCH * PAGE_SIZE / 2)
|
||||
end = start + MIN_LRU_BATCH * PAGE_SIZE;
|
||||
- else if (end - pvmw->address < MIN_LRU_BATCH * PAGE_SIZE / 2)
|
||||
+ else if (end - addr < MIN_LRU_BATCH * PAGE_SIZE / 2)
|
||||
start = end - MIN_LRU_BATCH * PAGE_SIZE;
|
||||
else {
|
||||
- start = pvmw->address - MIN_LRU_BATCH * PAGE_SIZE / 2;
|
||||
- end = pvmw->address + MIN_LRU_BATCH * PAGE_SIZE / 2;
|
||||
+ start = addr - MIN_LRU_BATCH * PAGE_SIZE / 2;
|
||||
+ end = addr + MIN_LRU_BATCH * PAGE_SIZE / 2;
|
||||
}
|
||||
}
|
||||
|
||||
- pte = pvmw->pte - (pvmw->address - start) / PAGE_SIZE;
|
||||
+ /* folio_update_gen() requires stable folio_memcg() */
|
||||
+ if (!mem_cgroup_trylock_pages(memcg))
|
||||
+ return;
|
||||
|
||||
- rcu_read_lock();
|
||||
arch_enter_lazy_mmu_mode();
|
||||
|
||||
+ pte -= (addr - start) / PAGE_SIZE;
|
||||
+
|
||||
for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) {
|
||||
unsigned long pfn;
|
||||
|
||||
@@ -4637,56 +4639,27 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
|
||||
!folio_test_swapcache(folio)))
|
||||
folio_mark_dirty(folio);
|
||||
|
||||
+ if (walk) {
|
||||
+ old_gen = folio_update_gen(folio, new_gen);
|
||||
+ if (old_gen >= 0 && old_gen != new_gen)
|
||||
+ update_batch_size(walk, folio, old_gen, new_gen);
|
||||
+
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
old_gen = folio_lru_gen(folio);
|
||||
if (old_gen < 0)
|
||||
folio_set_referenced(folio);
|
||||
else if (old_gen != new_gen)
|
||||
- __set_bit(i, bitmap);
|
||||
+ folio_activate(folio);
|
||||
}
|
||||
|
||||
arch_leave_lazy_mmu_mode();
|
||||
- rcu_read_unlock();
|
||||
+ mem_cgroup_unlock_pages();
|
||||
|
||||
/* feedback from rmap walkers to page table walkers */
|
||||
if (suitable_to_scan(i, young))
|
||||
update_bloom_filter(lruvec, max_seq, pvmw->pmd);
|
||||
-
|
||||
- if (!walk && bitmap_weight(bitmap, MIN_LRU_BATCH) < PAGEVEC_SIZE) {
|
||||
- for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
|
||||
- folio = pfn_folio(pte_pfn(pte[i]));
|
||||
- folio_activate(folio);
|
||||
- }
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- /* folio_update_gen() requires stable folio_memcg() */
|
||||
- if (!mem_cgroup_trylock_pages(memcg))
|
||||
- return;
|
||||
-
|
||||
- if (!walk) {
|
||||
- spin_lock_irq(&lruvec->lru_lock);
|
||||
- new_gen = lru_gen_from_seq(lruvec->lrugen.max_seq);
|
||||
- }
|
||||
-
|
||||
- for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
|
||||
- folio = pfn_folio(pte_pfn(pte[i]));
|
||||
- if (folio_memcg_rcu(folio) != memcg)
|
||||
- continue;
|
||||
-
|
||||
- old_gen = folio_update_gen(folio, new_gen);
|
||||
- if (old_gen < 0 || old_gen == new_gen)
|
||||
- continue;
|
||||
-
|
||||
- if (walk)
|
||||
- update_batch_size(walk, folio, old_gen, new_gen);
|
||||
- else
|
||||
- lru_gen_update_size(lruvec, folio, old_gen, new_gen);
|
||||
- }
|
||||
-
|
||||
- if (!walk)
|
||||
- spin_unlock_irq(&lruvec->lru_lock);
|
||||
-
|
||||
- mem_cgroup_unlock_pages();
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
--
|
||||
2.40.1
|
||||
|
Loading…
Reference in New Issue
Block a user