123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287 |
- From e20b7386fccc18c791796eb1dc1a91eee3ccf801 Mon Sep 17 00:00:00 2001
- From: Yu Zhao <[email protected]>
- Date: Wed, 21 Dec 2022 21:19:02 -0700
- Subject: [PATCH 24/29] mm: multi-gen LRU: remove aging fairness safeguard
- Recall that the aging produces the youngest generation: first it scans
- for accessed pages and updates their gen counters; then it increments
- lrugen->max_seq.
- The current aging fairness safeguard for kswapd uses two passes to
- ensure the fairness to multiple eligible memcgs. On the first pass,
- which is shared with the eviction, it checks whether all eligible
- memcgs are low on cold pages. If so, it requires a second pass, on
- which it ages all those memcgs at the same time.
- With memcg LRU, the aging, while ensuring eventual fairness, will run
- when necessary. Therefore the current aging fairness safeguard for
- kswapd will not be needed.
- Note that memcg LRU only applies to global reclaim. For memcg reclaim,
- the aging can be unfair to different memcgs, i.e., their
- lrugen->max_seq can be incremented at different paces.
- Link: https://lkml.kernel.org/r/[email protected]
- Signed-off-by: Yu Zhao <[email protected]>
- Cc: Johannes Weiner <[email protected]>
- Cc: Jonathan Corbet <[email protected]>
- Cc: Michael Larabel <[email protected]>
- Cc: Michal Hocko <[email protected]>
- Cc: Mike Rapoport <[email protected]>
- Cc: Roman Gushchin <[email protected]>
- Cc: Suren Baghdasaryan <[email protected]>
- Signed-off-by: Andrew Morton <[email protected]>
- ---
- mm/vmscan.c | 126 ++++++++++++++++++++++++----------------------------
- 1 file changed, 59 insertions(+), 67 deletions(-)
- --- a/mm/vmscan.c
- +++ b/mm/vmscan.c
- @@ -131,7 +131,6 @@ struct scan_control {
-
- #ifdef CONFIG_LRU_GEN
- /* help kswapd make better choices among multiple memcgs */
- - unsigned int memcgs_need_aging:1;
- unsigned long last_reclaimed;
- #endif
-
- @@ -4184,7 +4183,7 @@ done:
- return true;
- }
-
- -static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsigned long *min_seq,
- +static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
- struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan)
- {
- int gen, type, zone;
- @@ -4193,6 +4192,13 @@ static bool should_run_aging(struct lruv
- unsigned long total = 0;
- struct lru_gen_page *lrugen = &lruvec->lrugen;
- struct mem_cgroup *memcg = lruvec_memcg(lruvec);
- + DEFINE_MIN_SEQ(lruvec);
- +
- + /* whether this lruvec is completely out of cold pages */
- + if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) {
- + *nr_to_scan = 0;
- + return true;
- + }
-
- for (type = !can_swap; type < ANON_AND_FILE; type++) {
- unsigned long seq;
- @@ -4221,8 +4227,6 @@ static bool should_run_aging(struct lruv
- * stalls when the number of generations reaches MIN_NR_GENS. Hence, the
- * ideal number of generations is MIN_NR_GENS+1.
- */
- - if (min_seq[!can_swap] + MIN_NR_GENS > max_seq)
- - return true;
- if (min_seq[!can_swap] + MIN_NR_GENS < max_seq)
- return false;
-
- @@ -4241,40 +4245,54 @@ static bool should_run_aging(struct lruv
- return false;
- }
-
- -static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned long min_ttl)
- +static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc)
- {
- - bool need_aging;
- - unsigned long nr_to_scan;
- - int swappiness = get_swappiness(lruvec, sc);
- + int gen, type, zone;
- + unsigned long total = 0;
- + bool can_swap = get_swappiness(lruvec, sc);
- + struct lru_gen_page *lrugen = &lruvec->lrugen;
- struct mem_cgroup *memcg = lruvec_memcg(lruvec);
- DEFINE_MAX_SEQ(lruvec);
- DEFINE_MIN_SEQ(lruvec);
-
- - VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
- + for (type = !can_swap; type < ANON_AND_FILE; type++) {
- + unsigned long seq;
-
- - mem_cgroup_calculate_protection(NULL, memcg);
- + for (seq = min_seq[type]; seq <= max_seq; seq++) {
- + gen = lru_gen_from_seq(seq);
-
- - if (mem_cgroup_below_min(memcg))
- - return false;
- + for (zone = 0; zone < MAX_NR_ZONES; zone++)
- + total += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
- + }
- + }
-
- - need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, swappiness, &nr_to_scan);
- + /* whether the size is big enough to be helpful */
- + return mem_cgroup_online(memcg) ? (total >> sc->priority) : total;
- +}
-
- - if (min_ttl) {
- - int gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
- - unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
- +static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc,
- + unsigned long min_ttl)
- +{
- + int gen;
- + unsigned long birth;
- + struct mem_cgroup *memcg = lruvec_memcg(lruvec);
- + DEFINE_MIN_SEQ(lruvec);
-
- - if (time_is_after_jiffies(birth + min_ttl))
- - return false;
- + VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
-
- - /* the size is likely too small to be helpful */
- - if (!nr_to_scan && sc->priority != DEF_PRIORITY)
- - return false;
- - }
- + /* see the comment on lru_gen_page */
- + gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
- + birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
-
- - if (need_aging)
- - try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false);
- + if (time_is_after_jiffies(birth + min_ttl))
- + return false;
-
- - return true;
- + if (!lruvec_is_sizable(lruvec, sc))
- + return false;
- +
- + mem_cgroup_calculate_protection(NULL, memcg);
- +
- + return !mem_cgroup_below_min(memcg);
- }
-
- /* to protect the working set of the last N jiffies */
- @@ -4283,46 +4301,32 @@ static unsigned long lru_gen_min_ttl __r
- static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
- {
- struct mem_cgroup *memcg;
- - bool success = false;
- unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl);
-
- VM_WARN_ON_ONCE(!current_is_kswapd());
-
- sc->last_reclaimed = sc->nr_reclaimed;
-
- - /*
- - * To reduce the chance of going into the aging path, which can be
- - * costly, optimistically skip it if the flag below was cleared in the
- - * eviction path. This improves the overall performance when multiple
- - * memcgs are available.
- - */
- - if (!sc->memcgs_need_aging) {
- - sc->memcgs_need_aging = true;
- + /* check the order to exclude compaction-induced reclaim */
- + if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY)
- return;
- - }
- -
- - set_mm_walk(pgdat);
-
- memcg = mem_cgroup_iter(NULL, NULL, NULL);
- do {
- struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
-
- - if (age_lruvec(lruvec, sc, min_ttl))
- - success = true;
- + if (lruvec_is_reclaimable(lruvec, sc, min_ttl)) {
- + mem_cgroup_iter_break(NULL, memcg);
- + return;
- + }
-
- cond_resched();
- } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
-
- - clear_mm_walk();
- -
- - /* check the order to exclude compaction-induced reclaim */
- - if (success || !min_ttl || sc->order)
- - return;
- -
- /*
- * The main goal is to OOM kill if every generation from all memcgs is
- * younger than min_ttl. However, another possibility is all memcgs are
- - * either below min or empty.
- + * either too small or below min.
- */
- if (mutex_trylock(&oom_lock)) {
- struct oom_control oc = {
- @@ -4830,33 +4834,27 @@ retry:
- * reclaim.
- */
- static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
- - bool can_swap, bool *need_aging)
- + bool can_swap)
- {
- unsigned long nr_to_scan;
- struct mem_cgroup *memcg = lruvec_memcg(lruvec);
- DEFINE_MAX_SEQ(lruvec);
- - DEFINE_MIN_SEQ(lruvec);
-
- if (mem_cgroup_below_min(memcg) ||
- (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
- return 0;
-
- - *need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan);
- - if (!*need_aging)
- + if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan))
- return nr_to_scan;
-
- /* skip the aging path at the default priority */
- if (sc->priority == DEF_PRIORITY)
- - goto done;
- + return nr_to_scan;
-
- - /* leave the work to lru_gen_age_node() */
- - if (current_is_kswapd())
- - return 0;
- + try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false);
-
- - if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false))
- - return nr_to_scan;
- -done:
- - return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
- + /* skip this lruvec as it's low on cold pages */
- + return 0;
- }
-
- static unsigned long get_nr_to_reclaim(struct scan_control *sc)
- @@ -4875,9 +4873,7 @@ static unsigned long get_nr_to_reclaim(s
- static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
- {
- struct blk_plug plug;
- - bool need_aging = false;
- unsigned long scanned = 0;
- - unsigned long reclaimed = sc->nr_reclaimed;
- unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
-
- lru_add_drain();
- @@ -4898,13 +4894,13 @@ static void lru_gen_shrink_lruvec(struct
- else
- swappiness = 0;
-
- - nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness, &need_aging);
- + nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
- if (!nr_to_scan)
- - goto done;
- + break;
-
- delta = evict_pages(lruvec, sc, swappiness);
- if (!delta)
- - goto done;
- + break;
-
- scanned += delta;
- if (scanned >= nr_to_scan)
- @@ -4916,10 +4912,6 @@ static void lru_gen_shrink_lruvec(struct
- cond_resched();
- }
-
- - /* see the comment in lru_gen_age_node() */
- - if (sc->nr_reclaimed - reclaimed >= MIN_LRU_BATCH && !need_aging)
- - sc->memcgs_need_aging = false;
- -done:
- clear_mm_walk();
-
- blk_finish_plug(&plug);
|