2022-03-29 17:10:48 +01:00
|
|
|
From 05f366c941ae2bb8ba21c79fafcb747a5a6b967b Mon Sep 17 00:00:00 2001
|
|
|
|
From: Yu Zhao <yuzhao@google.com>
|
|
|
|
Date: Mon, 25 Jan 2021 21:12:33 -0700
|
|
|
|
Subject: [PATCH 04/10] mm: multigenerational lru: groundwork
|
|
|
|
|
|
|
|
For each lruvec, evictable pages are divided into multiple
|
|
|
|
generations. The youngest generation number is stored in
|
|
|
|
lrugen->max_seq for both anon and file types as they are aged on an
|
|
|
|
equal footing. The oldest generation numbers are stored in
|
|
|
|
lrugen->min_seq[] separately for anon and file types as clean file
|
|
|
|
pages can be evicted regardless of swap constraints. These three
|
|
|
|
variables are monotonically increasing. Generation numbers are
|
|
|
|
truncated into order_base_2(MAX_NR_GENS+1) bits in order to fit into
|
|
|
|
page->flags. The sliding window technique is used to prevent truncated
|
|
|
|
generation numbers from overlapping. Each truncated generation number
|
|
|
|
is an index to
|
|
|
|
lrugen->lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES].
|
|
|
|
|
|
|
|
The framework comprises two conceptually independent components: the
|
|
|
|
aging, which produces young generations, and the eviction, which
|
|
|
|
consumes old generations. Both can be invoked independently from user
|
|
|
|
space for the purpose of working set estimation and proactive reclaim.
|
|
|
|
|
|
|
|
The protection of hot pages and the selection of cold pages are based
|
|
|
|
on page access types and patterns. There are two access types: one via
|
|
|
|
page tables and the other via file descriptors. The protection of the
|
|
|
|
former type is by design stronger because:
|
|
|
|
1) The uncertainty in determining the access patterns of the former
|
|
|
|
type is higher due to the coalesced nature of the accessed bit.
|
|
|
|
2) The cost of evicting the former type is higher due to the TLB
|
|
|
|
flushes required and the likelihood of involving I/O.
|
|
|
|
3) The penalty of under-protecting the former type is higher because
|
|
|
|
applications usually do not prepare themselves for major faults like
|
|
|
|
they do for blocked I/O. For example, client applications commonly
|
|
|
|
dedicate blocked I/O to separate threads to avoid UI janks that
|
|
|
|
negatively affect user experience.
|
|
|
|
|
|
|
|
There are also two access patterns: one with temporal locality and the
|
|
|
|
other without. The latter pattern, e.g., random and sequential, needs
|
|
|
|
to be explicitly excluded to avoid weakening the protection of the
|
|
|
|
former pattern. Generally the former type follows the former pattern
|
|
|
|
unless MADV_SEQUENTIAL is specified and the latter type follows the
|
|
|
|
latter pattern unless outlying refaults have been observed.
|
|
|
|
|
|
|
|
Upon faulting, a page is added to the youngest generation, which
|
|
|
|
provides the strongest protection as the eviction will not consider
|
|
|
|
this page before the aging has scanned it at least twice. The first
|
|
|
|
scan clears the accessed bit set during the initial fault. And the
|
|
|
|
second scan makes sure this page has not been used since the first
|
|
|
|
scan. A page from any other generations is brought back to the
|
|
|
|
youngest generation whenever the aging finds the accessed bit set on
|
|
|
|
any of the PTEs mapping this page.
|
|
|
|
|
|
|
|
Unmapped pages are initially added to the oldest generation and then
|
|
|
|
conditionally protected by tiers. This is done later [PATCH 07/10].
|
|
|
|
|
|
|
|
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
|
|
|
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
|
|
|
Change-Id: I71de7cd15b8dfa6f9fdd838023474693c4fee0a7
|
|
|
|
---
|
|
|
|
fs/fuse/dev.c | 3 +-
|
|
|
|
include/linux/cgroup.h | 15 +-
|
|
|
|
include/linux/mm.h | 36 ++++
|
|
|
|
include/linux/mm_inline.h | 182 ++++++++++++++++++++
|
|
|
|
include/linux/mmzone.h | 70 ++++++++
|
|
|
|
include/linux/page-flags-layout.h | 19 ++-
|
|
|
|
include/linux/page-flags.h | 4 +-
|
|
|
|
include/linux/sched.h | 3 +
|
|
|
|
kernel/bounds.c | 3 +
|
|
|
|
kernel/cgroup/cgroup-internal.h | 1 -
|
|
|
|
mm/huge_memory.c | 3 +-
|
|
|
|
mm/memcontrol.c | 1 +
|
|
|
|
mm/memory.c | 7 +
|
|
|
|
mm/mm_init.c | 6 +-
|
|
|
|
mm/page_alloc.c | 1 +
|
|
|
|
mm/swap.c | 9 +-
|
|
|
|
mm/swapfile.c | 2 +
|
|
|
|
mm/vmscan.c | 268 ++++++++++++++++++++++++++++++
|
|
|
|
18 files changed, 618 insertions(+), 15 deletions(-)
|
|
|
|
|
|
|
|
--- a/fs/fuse/dev.c
|
|
|
|
+++ b/fs/fuse/dev.c
|
|
|
|
@@ -785,7 +785,8 @@ static int fuse_check_page(struct page *
|
|
|
|
1 << PG_active |
|
|
|
|
1 << PG_workingset |
|
|
|
|
1 << PG_reclaim |
|
|
|
|
- 1 << PG_waiters))) {
|
|
|
|
+ 1 << PG_waiters |
|
|
|
|
+ LRU_GEN_MASK | LRU_REFS_MASK))) {
|
|
|
|
dump_page(page, "fuse: trying to steal weird page");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
--- a/include/linux/cgroup.h
|
|
|
|
+++ b/include/linux/cgroup.h
|
2022-12-14 10:11:04 -05:00
|
|
|
@@ -433,6 +433,18 @@ static inline void cgroup_put(struct cgr
|
2022-03-29 17:10:48 +01:00
|
|
|
css_put(&cgrp->self);
|
|
|
|
}
|
|
|
|
|
|
|
|
+extern struct mutex cgroup_mutex;
|
|
|
|
+
|
|
|
|
+static inline void cgroup_lock(void)
|
|
|
|
+{
|
|
|
|
+ mutex_lock(&cgroup_mutex);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline void cgroup_unlock(void)
|
|
|
|
+{
|
|
|
|
+ mutex_unlock(&cgroup_mutex);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
/**
|
|
|
|
* task_css_set_check - obtain a task's css_set with extra access conditions
|
|
|
|
* @task: the task to obtain css_set for
|
2022-12-14 10:11:04 -05:00
|
|
|
@@ -447,7 +459,6 @@ static inline void cgroup_put(struct cgr
|
2022-03-29 17:10:48 +01:00
|
|
|
* as locks used during the cgroup_subsys::attach() methods.
|
|
|
|
*/
|
|
|
|
#ifdef CONFIG_PROVE_RCU
|
|
|
|
-extern struct mutex cgroup_mutex;
|
|
|
|
extern spinlock_t css_set_lock;
|
|
|
|
#define task_css_set_check(task, __c) \
|
|
|
|
rcu_dereference_check((task)->cgroups, \
|
2022-12-14 10:11:04 -05:00
|
|
|
@@ -708,6 +719,8 @@ struct cgroup;
|
2022-03-29 17:10:48 +01:00
|
|
|
static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; }
|
|
|
|
static inline void css_get(struct cgroup_subsys_state *css) {}
|
|
|
|
static inline void css_put(struct cgroup_subsys_state *css) {}
|
|
|
|
+static inline void cgroup_lock(void) {}
|
|
|
|
+static inline void cgroup_unlock(void) {}
|
|
|
|
static inline int cgroup_attach_task_all(struct task_struct *from,
|
|
|
|
struct task_struct *t) { return 0; }
|
|
|
|
static inline int cgroupstats_build(struct cgroupstats *stats,
|
|
|
|
--- a/include/linux/mm.h
|
|
|
|
+++ b/include/linux/mm.h
|
|
|
|
@@ -1093,6 +1093,8 @@ vm_fault_t finish_mkwrite_fault(struct v
|
|
|
|
#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH)
|
|
|
|
#define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH)
|
|
|
|
#define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH)
|
|
|
|
+#define LRU_GEN_PGOFF (KASAN_TAG_PGOFF - LRU_GEN_WIDTH)
|
|
|
|
+#define LRU_REFS_PGOFF (LRU_GEN_PGOFF - LRU_REFS_WIDTH)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Define the bit shifts to access each section. For non-existent
|
|
|
|
@@ -1807,6 +1809,40 @@ static inline void unmap_mapping_range(s
|
|
|
|
loff_t const holebegin, loff_t const holelen, int even_cows) { }
|
|
|
|
#endif
|
|
|
|
|
|
|
|
+#ifdef CONFIG_LRU_GEN
|
|
|
|
+static inline void task_enter_nonseq_fault(void)
|
|
|
|
+{
|
|
|
|
+ WARN_ON(current->in_nonseq_fault);
|
|
|
|
+
|
|
|
|
+ current->in_nonseq_fault = 1;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline void task_exit_nonseq_fault(void)
|
|
|
|
+{
|
|
|
|
+ WARN_ON(!current->in_nonseq_fault);
|
|
|
|
+
|
|
|
|
+ current->in_nonseq_fault = 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline bool task_in_nonseq_fault(void)
|
|
|
|
+{
|
|
|
|
+ return current->in_nonseq_fault;
|
|
|
|
+}
|
|
|
|
+#else
|
|
|
|
+static inline void task_enter_nonseq_fault(void)
|
|
|
|
+{
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline void task_exit_nonseq_fault(void)
|
|
|
|
+{
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline bool task_in_nonseq_fault(void)
|
|
|
|
+{
|
|
|
|
+ return false;
|
|
|
|
+}
|
|
|
|
+#endif /* CONFIG_LRU_GEN */
|
|
|
|
+
|
|
|
|
static inline void unmap_shared_mapping_range(struct address_space *mapping,
|
|
|
|
loff_t const holebegin, loff_t const holelen)
|
|
|
|
{
|
|
|
|
--- a/include/linux/mm_inline.h
|
|
|
|
+++ b/include/linux/mm_inline.h
|
|
|
|
@@ -79,11 +79,187 @@ static __always_inline enum lru_list pag
|
|
|
|
return lru;
|
|
|
|
}
|
|
|
|
|
|
|
|
+#ifdef CONFIG_LRU_GEN
|
|
|
|
+
|
|
|
|
+static inline bool lru_gen_enabled(void)
|
|
|
|
+{
|
|
|
|
+#ifdef CONFIG_LRU_GEN_ENABLED
|
|
|
|
+ DECLARE_STATIC_KEY_TRUE(lru_gen_static_key);
|
|
|
|
+
|
|
|
|
+ return static_branch_likely(&lru_gen_static_key);
|
|
|
|
+#else
|
|
|
|
+ DECLARE_STATIC_KEY_FALSE(lru_gen_static_key);
|
|
|
|
+
|
|
|
|
+ return static_branch_unlikely(&lru_gen_static_key);
|
|
|
|
+#endif
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* Return an index within the sliding window that tracks MAX_NR_GENS generations. */
|
|
|
|
+static inline int lru_gen_from_seq(unsigned long seq)
|
|
|
|
+{
|
|
|
|
+ return seq % MAX_NR_GENS;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* The youngest and the second youngest generations are counted as active. */
|
|
|
|
+static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen)
|
|
|
|
+{
|
|
|
|
+ unsigned long max_seq = lruvec->evictable.max_seq;
|
|
|
|
+
|
|
|
|
+ VM_BUG_ON(gen >= MAX_NR_GENS);
|
|
|
|
+
|
|
|
|
+ return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* Update the sizes of the multigenerational lru lists. */
|
|
|
|
+static inline void lru_gen_update_size(struct page *page, struct lruvec *lruvec,
|
|
|
|
+ int old_gen, int new_gen)
|
|
|
|
+{
|
|
|
|
+ int type = page_is_file_lru(page);
|
|
|
|
+ int zone = page_zonenum(page);
|
|
|
|
+ int delta = thp_nr_pages(page);
|
|
|
|
+ enum lru_list lru = type * LRU_FILE;
|
|
|
|
+ struct lrugen *lrugen = &lruvec->evictable;
|
|
|
|
+
|
|
|
|
+ lockdep_assert_held(&lruvec->lru_lock);
|
|
|
|
+ VM_BUG_ON(old_gen != -1 && old_gen >= MAX_NR_GENS);
|
|
|
|
+ VM_BUG_ON(new_gen != -1 && new_gen >= MAX_NR_GENS);
|
|
|
|
+ VM_BUG_ON(old_gen == -1 && new_gen == -1);
|
|
|
|
+
|
|
|
|
+ if (old_gen >= 0)
|
|
|
|
+ WRITE_ONCE(lrugen->sizes[old_gen][type][zone],
|
|
|
|
+ lrugen->sizes[old_gen][type][zone] - delta);
|
|
|
|
+ if (new_gen >= 0)
|
|
|
|
+ WRITE_ONCE(lrugen->sizes[new_gen][type][zone],
|
|
|
|
+ lrugen->sizes[new_gen][type][zone] + delta);
|
|
|
|
+
|
|
|
|
+ if (old_gen < 0) {
|
|
|
|
+ if (lru_gen_is_active(lruvec, new_gen))
|
|
|
|
+ lru += LRU_ACTIVE;
|
|
|
|
+ update_lru_size(lruvec, lru, zone, delta);
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (new_gen < 0) {
|
|
|
|
+ if (lru_gen_is_active(lruvec, old_gen))
|
|
|
|
+ lru += LRU_ACTIVE;
|
|
|
|
+ update_lru_size(lruvec, lru, zone, -delta);
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (!lru_gen_is_active(lruvec, old_gen) && lru_gen_is_active(lruvec, new_gen)) {
|
|
|
|
+ update_lru_size(lruvec, lru, zone, -delta);
|
|
|
|
+ update_lru_size(lruvec, lru + LRU_ACTIVE, zone, delta);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ VM_BUG_ON(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen));
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* Add a page to one of the multigenerational lru lists. Return true on success. */
|
|
|
|
+static inline bool lru_gen_add_page(struct page *page, struct lruvec *lruvec, bool reclaiming)
|
|
|
|
+{
|
|
|
|
+ int gen;
|
|
|
|
+ unsigned long old_flags, new_flags;
|
|
|
|
+ int type = page_is_file_lru(page);
|
|
|
|
+ int zone = page_zonenum(page);
|
|
|
|
+ struct lrugen *lrugen = &lruvec->evictable;
|
|
|
|
+
|
|
|
|
+ if (PageUnevictable(page) || !lrugen->enabled[type])
|
|
|
|
+ return false;
|
|
|
|
+ /*
|
|
|
|
+ * If a page shouldn't be considered for eviction, i.e., a page mapped
|
|
|
|
+ * upon fault during which the accessed bit is set, add it to the
|
|
|
|
+ * youngest generation.
|
|
|
|
+ *
|
|
|
|
+ * If a page can't be evicted immediately, i.e., an anon page not in
|
|
|
|
+ * swap cache or a dirty page pending writeback, add it to the second
|
|
|
|
+ * oldest generation.
|
|
|
|
+ *
|
|
|
|
+ * If a page could be evicted immediately, e.g., a clean page, add it to
|
|
|
|
+ * the oldest generation.
|
|
|
|
+ */
|
|
|
|
+ if (PageActive(page))
|
|
|
|
+ gen = lru_gen_from_seq(lrugen->max_seq);
|
|
|
|
+ else if ((!type && !PageSwapCache(page)) ||
|
|
|
|
+ (PageReclaim(page) && (PageDirty(page) || PageWriteback(page))))
|
|
|
|
+ gen = lru_gen_from_seq(lrugen->min_seq[type] + 1);
|
|
|
|
+ else
|
|
|
|
+ gen = lru_gen_from_seq(lrugen->min_seq[type]);
|
|
|
|
+
|
|
|
|
+ do {
|
|
|
|
+ new_flags = old_flags = READ_ONCE(page->flags);
|
|
|
|
+ VM_BUG_ON_PAGE(new_flags & LRU_GEN_MASK, page);
|
|
|
|
+
|
|
|
|
+ new_flags &= ~(LRU_GEN_MASK | BIT(PG_active));
|
|
|
|
+ new_flags |= (gen + 1UL) << LRU_GEN_PGOFF;
|
|
|
|
+ } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
|
|
|
|
+
|
|
|
|
+ lru_gen_update_size(page, lruvec, -1, gen);
|
|
|
|
+ /* for rotate_reclaimable_page() */
|
|
|
|
+ if (reclaiming)
|
|
|
|
+ list_add_tail(&page->lru, &lrugen->lists[gen][type][zone]);
|
|
|
|
+ else
|
|
|
|
+ list_add(&page->lru, &lrugen->lists[gen][type][zone]);
|
|
|
|
+
|
|
|
|
+ return true;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* Delete a page from one of the multigenerational lru lists. Return true on success. */
|
|
|
|
+static inline bool lru_gen_del_page(struct page *page, struct lruvec *lruvec, bool reclaiming)
|
|
|
|
+{
|
|
|
|
+ int gen;
|
|
|
|
+ unsigned long old_flags, new_flags;
|
|
|
|
+
|
|
|
|
+ do {
|
|
|
|
+ new_flags = old_flags = READ_ONCE(page->flags);
|
|
|
|
+ if (!(new_flags & LRU_GEN_MASK))
|
|
|
|
+ return false;
|
|
|
|
+
|
|
|
|
+ VM_BUG_ON_PAGE(PageActive(page), page);
|
|
|
|
+ VM_BUG_ON_PAGE(PageUnevictable(page), page);
|
|
|
|
+
|
|
|
|
+ gen = ((new_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
|
|
|
|
+
|
|
|
|
+ new_flags &= ~LRU_GEN_MASK;
|
|
|
|
+ /* for shrink_page_list() */
|
|
|
|
+ if (reclaiming)
|
|
|
|
+ new_flags &= ~(BIT(PG_referenced) | BIT(PG_reclaim));
|
|
|
|
+ else if (lru_gen_is_active(lruvec, gen))
|
|
|
|
+ new_flags |= BIT(PG_active);
|
|
|
|
+ } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
|
|
|
|
+
|
|
|
|
+ lru_gen_update_size(page, lruvec, gen, -1);
|
|
|
|
+ list_del(&page->lru);
|
|
|
|
+
|
|
|
|
+ return true;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+#else
|
|
|
|
+
|
|
|
|
+static inline bool lru_gen_enabled(void)
|
|
|
|
+{
|
|
|
|
+ return false;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline bool lru_gen_add_page(struct page *page, struct lruvec *lruvec, bool reclaiming)
|
|
|
|
+{
|
|
|
|
+ return false;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline bool lru_gen_del_page(struct page *page, struct lruvec *lruvec, bool reclaiming)
|
|
|
|
+{
|
|
|
|
+ return false;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+#endif /* CONFIG_LRU_GEN */
|
|
|
|
+
|
|
|
|
static __always_inline void add_page_to_lru_list(struct page *page,
|
|
|
|
struct lruvec *lruvec)
|
|
|
|
{
|
|
|
|
enum lru_list lru = page_lru(page);
|
|
|
|
|
|
|
|
+ if (lru_gen_add_page(page, lruvec, false))
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
|
|
|
|
list_add(&page->lru, &lruvec->lists[lru]);
|
|
|
|
}
|
|
|
|
@@ -93,6 +269,9 @@ static __always_inline void add_page_to_
|
|
|
|
{
|
|
|
|
enum lru_list lru = page_lru(page);
|
|
|
|
|
|
|
|
+ if (lru_gen_add_page(page, lruvec, true))
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
|
|
|
|
list_add_tail(&page->lru, &lruvec->lists[lru]);
|
|
|
|
}
|
|
|
|
@@ -100,6 +279,9 @@ static __always_inline void add_page_to_
|
|
|
|
static __always_inline void del_page_from_lru_list(struct page *page,
|
|
|
|
struct lruvec *lruvec)
|
|
|
|
{
|
|
|
|
+ if (lru_gen_del_page(page, lruvec, false))
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
list_del(&page->lru);
|
|
|
|
update_lru_size(lruvec, page_lru(page), page_zonenum(page),
|
|
|
|
-thp_nr_pages(page));
|
|
|
|
--- a/include/linux/mmzone.h
|
|
|
|
+++ b/include/linux/mmzone.h
|
|
|
|
@@ -294,6 +294,72 @@ enum lruvec_flags {
|
|
|
|
*/
|
|
|
|
};
|
|
|
|
|
|
|
|
+struct lruvec;
|
|
|
|
+
|
|
|
|
+#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
|
|
|
|
+#define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
|
|
|
|
+
|
|
|
|
+#ifdef CONFIG_LRU_GEN
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * For each lruvec, evictable pages are divided into multiple generations. The
|
|
|
|
+ * youngest and the oldest generation numbers, AKA max_seq and min_seq, are
|
|
|
|
+ * monotonically increasing. The sliding window technique is used to track at
|
|
|
|
+ * least MIN_NR_GENS and at most MAX_NR_GENS generations. An offset within the
|
|
|
|
+ * window, AKA gen, indexes an array of per-type and per-zone lists for the
|
|
|
|
+ * corresponding generation. The counter in page->flags stores gen+1 while a
|
|
|
|
+ * page is on one of the multigenerational lru lists. Otherwise, it stores 0.
|
|
|
|
+ *
|
|
|
|
+ * After a page is faulted in, the aging must check the accessed bit at least
|
|
|
|
+ * twice before the eviction would consider it. The first check clears the
|
|
|
|
+ * accessed bit set during the initial fault. The second check makes sure this
|
|
|
|
+ * page hasn't been used since then.
|
|
|
|
+ */
|
|
|
|
+#define MIN_NR_GENS 2
|
|
|
|
+#define MAX_NR_GENS ((unsigned int)CONFIG_NR_LRU_GENS)
|
|
|
|
+
|
|
|
|
+struct lrugen {
|
|
|
|
+ /* the aging increments the max generation number */
|
|
|
|
+ unsigned long max_seq;
|
|
|
|
+ /* the eviction increments the min generation numbers */
|
|
|
|
+ unsigned long min_seq[ANON_AND_FILE];
|
|
|
|
+ /* the birth time of each generation in jiffies */
|
|
|
|
+ unsigned long timestamps[MAX_NR_GENS];
|
|
|
|
+ /* the multigenerational lru lists */
|
|
|
|
+ struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
|
|
|
|
+ /* the sizes of the multigenerational lru lists in pages */
|
|
|
|
+ unsigned long sizes[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
|
|
|
|
+ /* whether the multigenerational lru is enabled */
|
|
|
|
+ bool enabled[ANON_AND_FILE];
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
+#define MAX_BATCH_SIZE 8192
|
|
|
|
+
|
|
|
|
+void lru_gen_init_state(struct mem_cgroup *memcg, struct lruvec *lruvec);
|
|
|
|
+void lru_gen_change_state(bool enable, bool main, bool swap);
|
|
|
|
+
|
|
|
|
+#ifdef CONFIG_MEMCG
|
|
|
|
+void lru_gen_init_memcg(struct mem_cgroup *memcg);
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
+#else /* !CONFIG_LRU_GEN */
|
|
|
|
+
|
|
|
|
+static inline void lru_gen_init_state(struct mem_cgroup *memcg, struct lruvec *lruvec)
|
|
|
|
+{
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static inline void lru_gen_change_state(bool enable, bool main, bool swap)
|
|
|
|
+{
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+#ifdef CONFIG_MEMCG
|
|
|
|
+static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
|
|
|
|
+{
|
|
|
|
+}
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
+#endif /* CONFIG_LRU_GEN */
|
|
|
|
+
|
|
|
|
struct lruvec {
|
|
|
|
struct list_head lists[NR_LRU_LISTS];
|
|
|
|
/* per lruvec lru_lock for memcg */
|
|
|
|
@@ -311,6 +377,10 @@ struct lruvec {
|
|
|
|
unsigned long refaults[ANON_AND_FILE];
|
|
|
|
/* Various lruvec state flags (enum lruvec_flags) */
|
|
|
|
unsigned long flags;
|
|
|
|
+#ifdef CONFIG_LRU_GEN
|
|
|
|
+ /* unevictable pages are on LRU_UNEVICTABLE */
|
|
|
|
+ struct lrugen evictable;
|
|
|
|
+#endif
|
|
|
|
#ifdef CONFIG_MEMCG
|
|
|
|
struct pglist_data *pgdat;
|
|
|
|
#endif
|
|
|
|
--- a/include/linux/page-flags-layout.h
|
|
|
|
+++ b/include/linux/page-flags-layout.h
|
|
|
|
@@ -26,6 +26,14 @@
|
|
|
|
|
|
|
|
#define ZONES_WIDTH ZONES_SHIFT
|
|
|
|
|
|
|
|
+#ifdef CONFIG_LRU_GEN
|
|
|
|
+/* LRU_GEN_WIDTH is generated from order_base_2(CONFIG_NR_LRU_GENS + 1). */
|
|
|
|
+#define LRU_REFS_WIDTH (CONFIG_TIERS_PER_GEN - 2)
|
|
|
|
+#else
|
|
|
|
+#define LRU_GEN_WIDTH 0
|
|
|
|
+#define LRU_REFS_WIDTH 0
|
|
|
|
+#endif /* CONFIG_LRU_GEN */
|
|
|
|
+
|
|
|
|
#ifdef CONFIG_SPARSEMEM
|
|
|
|
#include <asm/sparsemem.h>
|
|
|
|
#define SECTIONS_SHIFT (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS)
|
|
|
|
@@ -55,7 +63,8 @@
|
|
|
|
#define SECTIONS_WIDTH 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
|
|
|
|
+#if ZONES_WIDTH + LRU_GEN_WIDTH + LRU_REFS_WIDTH + SECTIONS_WIDTH + NODES_SHIFT \
|
|
|
|
+ <= BITS_PER_LONG - NR_PAGEFLAGS
|
|
|
|
#define NODES_WIDTH NODES_SHIFT
|
|
|
|
#elif defined(CONFIG_SPARSEMEM_VMEMMAP)
|
|
|
|
#error "Vmemmap: No space for nodes field in page flags"
|
|
|
|
@@ -89,8 +98,8 @@
|
|
|
|
#define LAST_CPUPID_SHIFT 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT \
|
|
|
|
- <= BITS_PER_LONG - NR_PAGEFLAGS
|
|
|
|
+#if ZONES_WIDTH + LRU_GEN_WIDTH + LRU_REFS_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
|
|
|
|
+ KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
|
|
|
|
#define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT
|
|
|
|
#else
|
|
|
|
#define LAST_CPUPID_WIDTH 0
|
|
|
|
@@ -100,8 +109,8 @@
|
|
|
|
#define LAST_CPUPID_NOT_IN_PAGE_FLAGS
|
|
|
|
#endif
|
|
|
|
|
|
|
|
-#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH \
|
|
|
|
- > BITS_PER_LONG - NR_PAGEFLAGS
|
|
|
|
+#if ZONES_WIDTH + LRU_GEN_WIDTH + LRU_REFS_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
|
|
|
|
+ KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS
|
|
|
|
#error "Not enough bits in page flags"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
--- a/include/linux/page-flags.h
|
|
|
|
+++ b/include/linux/page-flags.h
|
|
|
|
@@ -845,7 +845,7 @@ static inline void ClearPageSlabPfmemall
|
|
|
|
1UL << PG_private | 1UL << PG_private_2 | \
|
|
|
|
1UL << PG_writeback | 1UL << PG_reserved | \
|
|
|
|
1UL << PG_slab | 1UL << PG_active | \
|
|
|
|
- 1UL << PG_unevictable | __PG_MLOCKED)
|
|
|
|
+ 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Flags checked when a page is prepped for return by the page allocator.
|
|
|
|
@@ -856,7 +856,7 @@ static inline void ClearPageSlabPfmemall
|
|
|
|
* alloc-free cycle to prevent from reusing the page.
|
|
|
|
*/
|
|
|
|
#define PAGE_FLAGS_CHECK_AT_PREP \
|
|
|
|
- (PAGEFLAGS_MASK & ~__PG_HWPOISON)
|
|
|
|
+ ((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK)
|
|
|
|
|
|
|
|
#define PAGE_FLAGS_PRIVATE \
|
|
|
|
(1UL << PG_private | 1UL << PG_private_2)
|
|
|
|
--- a/include/linux/sched.h
|
|
|
|
+++ b/include/linux/sched.h
|
|
|
|
@@ -911,6 +911,9 @@ struct task_struct {
|
|
|
|
#ifdef CONFIG_MEMCG
|
|
|
|
unsigned in_user_fault:1;
|
|
|
|
#endif
|
|
|
|
+#ifdef CONFIG_LRU_GEN
|
|
|
|
+ unsigned in_nonseq_fault:1;
|
|
|
|
+#endif
|
|
|
|
#ifdef CONFIG_COMPAT_BRK
|
|
|
|
unsigned brk_randomized:1;
|
|
|
|
#endif
|
|
|
|
--- a/kernel/bounds.c
|
|
|
|
+++ b/kernel/bounds.c
|
|
|
|
@@ -22,6 +22,9 @@ int main(void)
|
|
|
|
DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
|
|
|
|
#endif
|
|
|
|
DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t));
|
|
|
|
+#ifdef CONFIG_LRU_GEN
|
|
|
|
+ DEFINE(LRU_GEN_WIDTH, order_base_2(CONFIG_NR_LRU_GENS + 1));
|
|
|
|
+#endif
|
|
|
|
/* End of constants */
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
--- a/kernel/cgroup/cgroup-internal.h
|
|
|
|
+++ b/kernel/cgroup/cgroup-internal.h
|
|
|
|
@@ -165,7 +165,6 @@ struct cgroup_mgctx {
|
|
|
|
#define DEFINE_CGROUP_MGCTX(name) \
|
|
|
|
struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name)
|
|
|
|
|
|
|
|
-extern struct mutex cgroup_mutex;
|
|
|
|
extern spinlock_t css_set_lock;
|
|
|
|
extern struct cgroup_subsys *cgroup_subsys[];
|
|
|
|
extern struct list_head cgroup_roots;
|
|
|
|
--- a/mm/huge_memory.c
|
|
|
|
+++ b/mm/huge_memory.c
|
|
|
|
@@ -2364,7 +2364,8 @@ static void __split_huge_page_tail(struc
|
|
|
|
#ifdef CONFIG_64BIT
|
|
|
|
(1L << PG_arch_2) |
|
|
|
|
#endif
|
|
|
|
- (1L << PG_dirty)));
|
|
|
|
+ (1L << PG_dirty) |
|
|
|
|
+ LRU_GEN_MASK | LRU_REFS_MASK));
|
|
|
|
|
|
|
|
/* ->mapping in first tail page is compound_mapcount */
|
|
|
|
VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
|
|
|
|
--- a/mm/memcontrol.c
|
|
|
|
+++ b/mm/memcontrol.c
|
2022-12-14 10:11:04 -05:00
|
|
|
@@ -5237,6 +5237,7 @@ static struct mem_cgroup *mem_cgroup_all
|
2022-03-29 17:10:48 +01:00
|
|
|
memcg->deferred_split_queue.split_queue_len = 0;
|
|
|
|
#endif
|
|
|
|
idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
|
|
|
|
+ lru_gen_init_memcg(memcg);
|
|
|
|
return memcg;
|
|
|
|
fail:
|
|
|
|
mem_cgroup_id_remove(memcg);
|
|
|
|
--- a/mm/memory.c
|
|
|
|
+++ b/mm/memory.c
|
|
|
|
@@ -4788,6 +4788,7 @@ vm_fault_t handle_mm_fault(struct vm_are
|
|
|
|
unsigned int flags, struct pt_regs *regs)
|
|
|
|
{
|
|
|
|
vm_fault_t ret;
|
|
|
|
+ bool nonseq_fault = !(vma->vm_flags & VM_SEQ_READ);
|
|
|
|
|
|
|
|
__set_current_state(TASK_RUNNING);
|
|
|
|
|
|
|
|
@@ -4809,11 +4810,17 @@ vm_fault_t handle_mm_fault(struct vm_are
|
|
|
|
if (flags & FAULT_FLAG_USER)
|
|
|
|
mem_cgroup_enter_user_fault();
|
|
|
|
|
|
|
|
+ if (nonseq_fault)
|
|
|
|
+ task_enter_nonseq_fault();
|
|
|
|
+
|
|
|
|
if (unlikely(is_vm_hugetlb_page(vma)))
|
|
|
|
ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
|
|
|
|
else
|
|
|
|
ret = __handle_mm_fault(vma, address, flags);
|
|
|
|
|
|
|
|
+ if (nonseq_fault)
|
|
|
|
+ task_exit_nonseq_fault();
|
|
|
|
+
|
|
|
|
if (flags & FAULT_FLAG_USER) {
|
|
|
|
mem_cgroup_exit_user_fault();
|
|
|
|
/*
|
|
|
|
--- a/mm/mm_init.c
|
|
|
|
+++ b/mm/mm_init.c
|
|
|
|
@@ -65,14 +65,16 @@ void __init mminit_verify_pageflags_layo
|
|
|
|
|
|
|
|
shift = 8 * sizeof(unsigned long);
|
|
|
|
width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH
|
|
|
|
- - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH;
|
|
|
|
+ - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH - LRU_GEN_WIDTH - LRU_REFS_WIDTH;
|
|
|
|
mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths",
|
|
|
|
- "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Flags %d\n",
|
|
|
|
+ "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Gen %d Tier %d Flags %d\n",
|
|
|
|
SECTIONS_WIDTH,
|
|
|
|
NODES_WIDTH,
|
|
|
|
ZONES_WIDTH,
|
|
|
|
LAST_CPUPID_WIDTH,
|
|
|
|
KASAN_TAG_WIDTH,
|
|
|
|
+ LRU_GEN_WIDTH,
|
|
|
|
+ LRU_REFS_WIDTH,
|
|
|
|
NR_PAGEFLAGS);
|
|
|
|
mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts",
|
|
|
|
"Section %d Node %d Zone %d Lastcpupid %d Kasantag %d\n",
|
|
|
|
--- a/mm/page_alloc.c
|
|
|
|
+++ b/mm/page_alloc.c
|
2023-02-15 13:15:38 -05:00
|
|
|
@@ -7459,6 +7459,7 @@ static void __meminit pgdat_init_interna
|
2022-03-29 17:10:48 +01:00
|
|
|
|
|
|
|
pgdat_page_ext_init(pgdat);
|
|
|
|
lruvec_init(&pgdat->__lruvec);
|
|
|
|
+ lru_gen_init_state(NULL, &pgdat->__lruvec);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
|
|
|
|
--- a/mm/swap.c
|
|
|
|
+++ b/mm/swap.c
|
|
|
|
@@ -446,6 +446,11 @@ void lru_cache_add(struct page *page)
|
|
|
|
VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page);
|
|
|
|
VM_BUG_ON_PAGE(PageLRU(page), page);
|
|
|
|
|
|
|
|
+ /* see the comment in lru_gen_add_page() */
|
|
|
|
+ if (lru_gen_enabled() && !PageUnevictable(page) &&
|
|
|
|
+ task_in_nonseq_fault() && !(current->flags & PF_MEMALLOC))
|
|
|
|
+ SetPageActive(page);
|
|
|
|
+
|
|
|
|
get_page(page);
|
|
|
|
local_lock(&lru_pvecs.lock);
|
|
|
|
pvec = this_cpu_ptr(&lru_pvecs.lru_add);
|
|
|
|
@@ -547,7 +552,7 @@ static void lru_deactivate_file_fn(struc
|
|
|
|
|
|
|
|
static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec)
|
|
|
|
{
|
|
|
|
- if (PageActive(page) && !PageUnevictable(page)) {
|
|
|
|
+ if (!PageUnevictable(page) && (PageActive(page) || lru_gen_enabled())) {
|
|
|
|
int nr_pages = thp_nr_pages(page);
|
|
|
|
|
|
|
|
del_page_from_lru_list(page, lruvec);
|
|
|
|
@@ -661,7 +666,7 @@ void deactivate_file_page(struct page *p
|
|
|
|
*/
|
|
|
|
void deactivate_page(struct page *page)
|
|
|
|
{
|
|
|
|
- if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
|
|
|
|
+ if (PageLRU(page) && !PageUnevictable(page) && (PageActive(page) || lru_gen_enabled())) {
|
|
|
|
struct pagevec *pvec;
|
|
|
|
|
|
|
|
local_lock(&lru_pvecs.lock);
|
|
|
|
--- a/mm/swapfile.c
|
|
|
|
+++ b/mm/swapfile.c
|
2023-02-09 07:45:03 -05:00
|
|
|
@@ -2689,6 +2689,7 @@ SYSCALL_DEFINE1(swapoff, const char __us
|
2022-03-29 17:10:48 +01:00
|
|
|
err = 0;
|
|
|
|
atomic_inc(&proc_poll_event);
|
|
|
|
wake_up_interruptible(&proc_poll_wait);
|
|
|
|
+ lru_gen_change_state(false, false, true);
|
|
|
|
|
|
|
|
out_dput:
|
|
|
|
filp_close(victim, NULL);
|
2023-02-09 07:45:03 -05:00
|
|
|
@@ -3350,6 +3351,7 @@ SYSCALL_DEFINE2(swapon, const char __use
|
2022-03-29 17:10:48 +01:00
|
|
|
mutex_unlock(&swapon_mutex);
|
|
|
|
atomic_inc(&proc_poll_event);
|
|
|
|
wake_up_interruptible(&proc_poll_wait);
|
|
|
|
+ lru_gen_change_state(true, false, true);
|
|
|
|
|
|
|
|
error = 0;
|
|
|
|
goto out;
|
|
|
|
--- a/mm/vmscan.c
|
|
|
|
+++ b/mm/vmscan.c
|
|
|
|
@@ -50,6 +50,7 @@
|
|
|
|
#include <linux/printk.h>
|
|
|
|
#include <linux/dax.h>
|
|
|
|
#include <linux/psi.h>
|
|
|
|
+#include <linux/memory.h>
|
|
|
|
|
|
|
|
#include <asm/tlbflush.h>
|
|
|
|
#include <asm/div64.h>
|
2022-12-08 08:32:44 -05:00
|
|
|
@@ -2815,6 +2816,273 @@ static bool can_age_anon_pages(struct pg
|
2022-03-29 17:10:48 +01:00
|
|
|
return can_demote(pgdat->node_id, sc);
|
|
|
|
}
|
|
|
|
|
|
|
|
+#ifdef CONFIG_LRU_GEN
|
|
|
|
+
|
|
|
|
+/******************************************************************************
|
|
|
|
+ * shorthand helpers
|
|
|
|
+ ******************************************************************************/
|
|
|
|
+
|
|
|
|
+#define for_each_gen_type_zone(gen, type, zone) \
|
|
|
|
+ for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++) \
|
|
|
|
+ for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \
|
|
|
|
+ for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
|
|
|
|
+
|
|
|
|
+static int page_lru_gen(struct page *page)
|
|
|
|
+{
|
|
|
|
+ unsigned long flags = READ_ONCE(page->flags);
|
|
|
|
+
|
|
|
|
+ return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static struct lruvec *get_lruvec(int nid, struct mem_cgroup *memcg)
|
|
|
|
+{
|
|
|
|
+ struct pglist_data *pgdat = NODE_DATA(nid);
|
|
|
|
+
|
|
|
|
+#ifdef CONFIG_MEMCG
|
|
|
|
+ if (memcg) {
|
|
|
|
+ struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec;
|
|
|
|
+
|
|
|
|
+ if (lruvec->pgdat != pgdat)
|
|
|
|
+ lruvec->pgdat = pgdat;
|
|
|
|
+
|
|
|
|
+ return lruvec;
|
|
|
|
+ }
|
|
|
|
+#endif
|
|
|
|
+ return pgdat ? &pgdat->__lruvec : NULL;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static int get_nr_gens(struct lruvec *lruvec, int type)
|
|
|
|
+{
|
|
|
|
+ return lruvec->evictable.max_seq - lruvec->evictable.min_seq[type] + 1;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
|
|
|
|
+{
|
|
|
|
+ return get_nr_gens(lruvec, 1) >= MIN_NR_GENS &&
|
|
|
|
+ get_nr_gens(lruvec, 1) <= get_nr_gens(lruvec, 0) &&
|
|
|
|
+ get_nr_gens(lruvec, 0) <= MAX_NR_GENS;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/******************************************************************************
|
|
|
|
+ * state change
|
|
|
|
+ ******************************************************************************/
|
|
|
|
+
|
|
|
|
+#ifdef CONFIG_LRU_GEN_ENABLED
|
|
|
|
+DEFINE_STATIC_KEY_TRUE(lru_gen_static_key);
|
|
|
|
+#else
|
|
|
|
+DEFINE_STATIC_KEY_FALSE(lru_gen_static_key);
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
+static int lru_gen_nr_swapfiles;
|
|
|
|
+
|
|
|
|
+static bool __maybe_unused state_is_valid(struct lruvec *lruvec)
|
|
|
|
+{
|
|
|
|
+ int gen, type, zone;
|
|
|
|
+ enum lru_list lru;
|
|
|
|
+ struct lrugen *lrugen = &lruvec->evictable;
|
|
|
|
+
|
|
|
|
+ for_each_evictable_lru(lru) {
|
|
|
|
+ type = is_file_lru(lru);
|
|
|
|
+
|
|
|
|
+ if (lrugen->enabled[type] && !list_empty(&lruvec->lists[lru]))
|
|
|
|
+ return false;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ for_each_gen_type_zone(gen, type, zone) {
|
|
|
|
+ if (!lrugen->enabled[type] && !list_empty(&lrugen->lists[gen][type][zone]))
|
|
|
|
+ return false;
|
|
|
|
+
|
|
|
|
+ /* unlikely but not a bug when reset_batch_size() is pending */
|
|
|
|
+ VM_WARN_ON(!lrugen->enabled[type] && lrugen->sizes[gen][type][zone]);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return true;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static bool fill_lists(struct lruvec *lruvec)
|
|
|
|
+{
|
|
|
|
+ enum lru_list lru;
|
|
|
|
+ int remaining = MAX_BATCH_SIZE;
|
|
|
|
+
|
|
|
|
+ for_each_evictable_lru(lru) {
|
|
|
|
+ int type = is_file_lru(lru);
|
|
|
|
+ bool active = is_active_lru(lru);
|
|
|
|
+ struct list_head *head = &lruvec->lists[lru];
|
|
|
|
+
|
|
|
|
+ if (!lruvec->evictable.enabled[type])
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ while (!list_empty(head)) {
|
|
|
|
+ bool success;
|
|
|
|
+ struct page *page = lru_to_page(head);
|
|
|
|
+
|
|
|
|
+ VM_BUG_ON_PAGE(PageTail(page), page);
|
|
|
|
+ VM_BUG_ON_PAGE(PageUnevictable(page), page);
|
|
|
|
+ VM_BUG_ON_PAGE(PageActive(page) != active, page);
|
|
|
|
+ VM_BUG_ON_PAGE(page_is_file_lru(page) != type, page);
|
|
|
|
+ VM_BUG_ON_PAGE(page_lru_gen(page) < MAX_NR_GENS, page);
|
|
|
|
+
|
|
|
|
+ prefetchw_prev_lru_page(page, head, flags);
|
|
|
|
+
|
|
|
|
+ del_page_from_lru_list(page, lruvec);
|
|
|
|
+ success = lru_gen_add_page(page, lruvec, false);
|
|
|
|
+ VM_BUG_ON(!success);
|
|
|
|
+
|
|
|
|
+ if (!--remaining)
|
|
|
|
+ return false;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return true;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static bool drain_lists(struct lruvec *lruvec)
|
|
|
|
+{
|
|
|
|
+ int gen, type, zone;
|
|
|
|
+ int remaining = MAX_BATCH_SIZE;
|
|
|
|
+
|
|
|
|
+ for_each_gen_type_zone(gen, type, zone) {
|
|
|
|
+ struct list_head *head = &lruvec->evictable.lists[gen][type][zone];
|
|
|
|
+
|
|
|
|
+ if (lruvec->evictable.enabled[type])
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ while (!list_empty(head)) {
|
|
|
|
+ bool success;
|
|
|
|
+ struct page *page = lru_to_page(head);
|
|
|
|
+
|
|
|
|
+ VM_BUG_ON_PAGE(PageTail(page), page);
|
|
|
|
+ VM_BUG_ON_PAGE(PageUnevictable(page), page);
|
|
|
|
+ VM_BUG_ON_PAGE(PageActive(page), page);
|
|
|
|
+ VM_BUG_ON_PAGE(page_is_file_lru(page) != type, page);
|
|
|
|
+ VM_BUG_ON_PAGE(page_zonenum(page) != zone, page);
|
|
|
|
+
|
|
|
|
+ prefetchw_prev_lru_page(page, head, flags);
|
|
|
|
+
|
|
|
|
+ success = lru_gen_del_page(page, lruvec, false);
|
|
|
|
+ VM_BUG_ON(!success);
|
|
|
|
+ add_page_to_lru_list(page, lruvec);
|
|
|
|
+
|
|
|
|
+ if (!--remaining)
|
|
|
|
+ return false;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return true;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * For file page tracking, we enable/disable it according to the main switch.
|
|
|
|
+ * For anon page tracking, we only enabled it when the main switch is on and
|
|
|
|
+ * there is at least one swapfile; we disable it when there are no swapfiles
|
|
|
|
+ * regardless of the value of the main switch. Otherwise, we will eventually
|
|
|
|
+ * reach the max size of the sliding window and have to call inc_min_seq().
|
|
|
|
+ */
|
|
|
|
+void lru_gen_change_state(bool enable, bool main, bool swap)
|
|
|
|
+{
|
|
|
|
+ static DEFINE_MUTEX(state_mutex);
|
|
|
|
+
|
|
|
|
+ struct mem_cgroup *memcg;
|
|
|
|
+
|
|
|
|
+ mem_hotplug_begin();
|
|
|
|
+ cgroup_lock();
|
|
|
|
+ mutex_lock(&state_mutex);
|
|
|
|
+
|
|
|
|
+ if (swap) {
|
|
|
|
+ if (enable)
|
|
|
|
+ swap = !lru_gen_nr_swapfiles++;
|
|
|
|
+ else
|
|
|
|
+ swap = !--lru_gen_nr_swapfiles;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (main && enable != lru_gen_enabled()) {
|
|
|
|
+ if (enable)
|
|
|
|
+ static_branch_enable(&lru_gen_static_key);
|
|
|
|
+ else
|
|
|
|
+ static_branch_disable(&lru_gen_static_key);
|
|
|
|
+ } else if (!swap || !lru_gen_enabled())
|
|
|
|
+ goto unlock;
|
|
|
|
+
|
|
|
|
+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
|
|
|
|
+ do {
|
|
|
|
+ int nid;
|
|
|
|
+
|
|
|
|
+ for_each_node(nid) {
|
|
|
|
+ struct lruvec *lruvec = get_lruvec(nid, memcg);
|
|
|
|
+
|
|
|
|
+ if (!lruvec)
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ spin_lock_irq(&lruvec->lru_lock);
|
|
|
|
+
|
|
|
|
+ VM_BUG_ON(!seq_is_valid(lruvec));
|
|
|
|
+ VM_BUG_ON(!state_is_valid(lruvec));
|
|
|
|
+
|
|
|
|
+ lruvec->evictable.enabled[0] = lru_gen_enabled() && lru_gen_nr_swapfiles;
|
|
|
|
+ lruvec->evictable.enabled[1] = lru_gen_enabled();
|
|
|
|
+
|
|
|
|
+ while (!(enable ? fill_lists(lruvec) : drain_lists(lruvec))) {
|
|
|
|
+ spin_unlock_irq(&lruvec->lru_lock);
|
|
|
|
+ cond_resched();
|
|
|
|
+ spin_lock_irq(&lruvec->lru_lock);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ spin_unlock_irq(&lruvec->lru_lock);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ cond_resched();
|
|
|
|
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
|
|
|
|
+unlock:
|
|
|
|
+ mutex_unlock(&state_mutex);
|
|
|
|
+ cgroup_unlock();
|
|
|
|
+ mem_hotplug_done();
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/******************************************************************************
|
|
|
|
+ * initialization
|
|
|
|
+ ******************************************************************************/
|
|
|
|
+
|
|
|
|
+void lru_gen_init_state(struct mem_cgroup *memcg, struct lruvec *lruvec)
|
|
|
|
+{
|
|
|
|
+ int i;
|
|
|
|
+ int gen, type, zone;
|
|
|
|
+ struct lrugen *lrugen = &lruvec->evictable;
|
|
|
|
+
|
|
|
|
+ lrugen->max_seq = MIN_NR_GENS + 1;
|
|
|
|
+ lrugen->enabled[0] = lru_gen_enabled() && lru_gen_nr_swapfiles;
|
|
|
|
+ lrugen->enabled[1] = lru_gen_enabled();
|
|
|
|
+
|
|
|
|
+ for (i = 0; i <= MIN_NR_GENS + 1; i++)
|
|
|
|
+ lrugen->timestamps[i] = jiffies;
|
|
|
|
+
|
|
|
|
+ for_each_gen_type_zone(gen, type, zone)
|
|
|
|
+ INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+#ifdef CONFIG_MEMCG
|
|
|
|
+void lru_gen_init_memcg(struct mem_cgroup *memcg)
|
|
|
|
+{
|
|
|
|
+ int nid;
|
|
|
|
+
|
|
|
|
+ for_each_node(nid) {
|
|
|
|
+ struct lruvec *lruvec = get_lruvec(nid, memcg);
|
|
|
|
+
|
|
|
|
+ lru_gen_init_state(memcg, lruvec);
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
+static int __init init_lru_gen(void)
|
|
|
|
+{
|
|
|
|
+ BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
|
|
|
|
+ BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+};
|
|
|
|
+late_initcall(init_lru_gen);
|
|
|
|
+
|
|
|
|
+#endif /* CONFIG_LRU_GEN */
|
|
|
|
+
|
|
|
|
static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
|
|
|
{
|
|
|
|
unsigned long nr[NR_LRU_LISTS];
|