diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 132 |
1 files changed, 91 insertions, 41 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 5e8eadd71bac..37e90db1520b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -754,13 +754,17 @@ enum page_references { }; static enum page_references page_check_references(struct page *page, - struct scan_control *sc) + struct scan_control *sc, + bool *freeable) { int referenced_ptes, referenced_page; unsigned long vm_flags; + int pte_dirty; + + VM_BUG_ON_PAGE(!PageLocked(page), page); referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup, - &vm_flags); + &vm_flags, &pte_dirty); referenced_page = TestClearPageReferenced(page); /* @@ -801,6 +805,10 @@ static enum page_references page_check_references(struct page *page, return PAGEREF_KEEP; } + if (PageAnon(page) && !pte_dirty && !PageSwapCache(page) && + !PageDirty(page)) + *freeable = true; + /* Reclaim if clean, defer dirty pages to writeback */ if (referenced_page && !PageSwapBacked(page)) return PAGEREF_RECLAIM_CLEAN; @@ -869,6 +877,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, int may_enter_fs; enum page_references references = PAGEREF_RECLAIM_CLEAN; bool dirty, writeback; + bool freeable = false; cond_resched(); @@ -992,7 +1001,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, } if (!force_reclaim) - references = page_check_references(page, sc); + references = page_check_references(page, sc, + &freeable); switch (references) { case PAGEREF_ACTIVATE: @@ -1009,22 +1019,31 @@ static unsigned long shrink_page_list(struct list_head *page_list, * Try to allocate it some swap space here. */ if (PageAnon(page) && !PageSwapCache(page)) { - if (!(sc->gfp_mask & __GFP_IO)) - goto keep_locked; - if (!add_to_swap(page, page_list)) - goto activate_locked; - may_enter_fs = 1; - - /* Adding to swap updated mapping */ - mapping = page_mapping(page); + if (!freeable) { + if (!(sc->gfp_mask & __GFP_IO)) + goto keep_locked; + if (!add_to_swap(page, page_list)) + goto activate_locked; + may_enter_fs = 1; + /* Adding to swap updated mapping */ + mapping = page_mapping(page); + } else { + if (likely(!PageTransHuge(page))) + goto unmap; + /* try_to_unmap isn't aware of THP page */ + if (unlikely(split_huge_page_to_list(page, + page_list))) + goto keep_locked; + } } - +unmap: /* * The page is mapped into the page tables of one or more * processes. Try to unmap it here. */ - if (page_mapped(page) && mapping) { - switch (try_to_unmap(page, ttu_flags)) { + if (page_mapped(page) && (mapping || freeable)) { + switch (try_to_unmap(page, + freeable ? TTU_FREE : ttu_flags)) { case SWAP_FAIL: goto activate_locked; case SWAP_AGAIN: @@ -1032,7 +1051,20 @@ static unsigned long shrink_page_list(struct list_head *page_list, case SWAP_MLOCK: goto cull_mlocked; case SWAP_SUCCESS: - ; /* try to free the page below */ + /* try to free the page below */ + if (!freeable) + break; + /* + * Freeable anon page doesn't have mapping + * due to skipping of swapcache so we free + * page in here rather than __remove_mapping. + */ + VM_BUG_ON_PAGE(PageSwapCache(page), page); + if (!page_freeze_refs(page, 1)) + goto keep_locked; + __ClearPageLocked(page); + count_vm_event(PGLAZYFREED); + goto free_it; } } @@ -1142,7 +1174,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, * we obviously don't have to worry about waking up a process * waiting on the page lock, because there are no references. */ - __clear_page_locked(page); + __ClearPageLocked(page); free_it: nr_reclaimed++; @@ -1401,6 +1433,32 @@ int isolate_lru_page(struct page *page) return ret; } +static int __too_many_isolated(struct zone *zone, int file, + struct scan_control *sc, int safe) +{ + unsigned long inactive, isolated; + + if (safe) { + inactive = zone_page_state_snapshot(zone, + NR_INACTIVE_ANON + 2 * file); + isolated = zone_page_state_snapshot(zone, + NR_ISOLATED_ANON + file); + } else { + inactive = zone_page_state(zone, NR_INACTIVE_ANON + 2 * file); + isolated = zone_page_state(zone, NR_ISOLATED_ANON + file); + } + + /* + * GFP_NOIO/GFP_NOFS callers are allowed to isolate more pages, so they + * won't get blocked by normal direct-reclaimers, forming a circular + * deadlock. + */ + if ((sc->gfp_mask & GFP_IOFS) == GFP_IOFS) + inactive >>= 3; + + return isolated > inactive; +} + /* * A direct reclaimer may isolate SWAP_CLUSTER_MAX pages from the LRU list and * then get resheduled. When there are massive number of tasks doing page @@ -1409,33 +1467,24 @@ int isolate_lru_page(struct page *page) * unnecessary swapping, thrashing and OOM. */ static int too_many_isolated(struct zone *zone, int file, - struct scan_control *sc) + struct scan_control *sc) { - unsigned long inactive, isolated; - if (current_is_kswapd()) return 0; if (!global_reclaim(sc)) return 0; - if (file) { - inactive = zone_page_state(zone, NR_INACTIVE_FILE); - isolated = zone_page_state(zone, NR_ISOLATED_FILE); - } else { - inactive = zone_page_state(zone, NR_INACTIVE_ANON); - isolated = zone_page_state(zone, NR_ISOLATED_ANON); - } - /* - * GFP_NOIO/GFP_NOFS callers are allowed to isolate more pages, so they - * won't get blocked by normal direct-reclaimers, forming a circular - * deadlock. + * __too_many_isolated(safe=0) is fast but inaccurate, because it + * doesn't account for the vm_stat_diff[] counters. So if it looks + * like too_many_isolated() is about to return true, fall back to the + * slower, more accurate zone_page_state_snapshot(). */ - if ((sc->gfp_mask & GFP_IOFS) == GFP_IOFS) - inactive >>= 3; + if (unlikely(__too_many_isolated(zone, file, sc, 0))) + return __too_many_isolated(zone, file, sc, 1); - return isolated > inactive; + return 0; } static noinline_for_stack void @@ -1772,7 +1821,7 @@ static void shrink_active_list(unsigned long nr_to_scan, } if (page_referenced(page, 0, sc->target_mem_cgroup, - &vm_flags)) { + &vm_flags, NULL)) { nr_rotated += hpage_nr_pages(page); /* * Identify referenced, file-backed active pages and @@ -2646,7 +2695,8 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) for (i = 0; i <= ZONE_NORMAL; i++) { zone = &pgdat->node_zones[i]; - if (!populated_zone(zone)) + if (!populated_zone(zone) || + zone_reclaimable_pages(zone) == 0) continue; pfmemalloc_reserve += min_wmark_pages(zone); @@ -3596,7 +3646,7 @@ int zone_reclaim_mode __read_mostly; #define RECLAIM_OFF 0 #define RECLAIM_ZONE (1<<0) /* Run shrink_inactive_list on the zone */ #define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */ -#define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */ +#define RECLAIM_UNMAP (1<<2) /* Unmap pages during reclaim */ /* * Priority for ZONE_RECLAIM. This determines the fraction of pages @@ -3638,12 +3688,12 @@ static long zone_pagecache_reclaimable(struct zone *zone) long delta = 0; /* - * If RECLAIM_SWAP is set, then all file pages are considered + * If RECLAIM_UNMAP is set, then all file pages are considered * potentially reclaimable. Otherwise, we have to worry about * pages like swapcache and zone_unmapped_file_pages() provides * a better estimate */ - if (zone_reclaim_mode & RECLAIM_SWAP) + if (zone_reclaim_mode & RECLAIM_UNMAP) nr_pagecache_reclaimable = zone_page_state(zone, NR_FILE_PAGES); else nr_pagecache_reclaimable = zone_unmapped_file_pages(zone); @@ -3674,15 +3724,15 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) .order = order, .priority = ZONE_RECLAIM_PRIORITY, .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), - .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), + .may_unmap = !!(zone_reclaim_mode & RECLAIM_UNMAP), .may_swap = 1, }; cond_resched(); /* - * We need to be able to allocate from the reserves for RECLAIM_SWAP + * We need to be able to allocate from the reserves for RECLAIM_UNMAP * and we also need to be able to write out pages for RECLAIM_WRITE - * and RECLAIM_SWAP. + * and RECLAIM_UNMAP. */ p->flags |= PF_MEMALLOC | PF_SWAPWRITE; lockdep_set_current_reclaim_state(gfp_mask); |