aboutsummaryrefslogtreecommitdiff
path: root/mm/migrate.c
diff options
context:
space:
mode:
authorSteven Rostedt <srostedt@redhat.com>2012-08-03 00:16:45 -0400
committerSteven Rostedt <rostedt@goodmis.org>2012-08-03 00:16:45 -0400
commit660436ca79a9749f3cc66984dae066ced57c9d84 (patch)
treedc51b638651aaa67d172ac334c1bf06ac1ea855e /mm/migrate.c
parent0a085d25fb3ae4f738a50c1e089be936e56e2a3a (diff)
parentf351a1d7efda2edd52c23a150b07b8380c47b6c0 (diff)
Merge tag 'v3.0.39' into v3.0-rt
This is the 3.0.39 stable release Conflicts: kernel/hrtimer.c kernel/time/ntp.c kernel/time/timekeeping.c Required fixup: diff --cc kernel/hrtimer.c index 363965f,957869f..0000000 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@@ -1597,17 -1407,24 +1611,24 @@@ void hrtimer_peek_ahead_timers(void local_irq_restore(flags); } +#else /* CONFIG_HIGH_RES_TIMERS */ + +static inline void __hrtimer_peek_ahead_timers(void) { } + +#endif /* !CONFIG_HIGH_RES_TIMERS */ + static void run_hrtimer_softirq(struct softirq_action *h) { + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); + + if (cpu_base->clock_was_set) { + cpu_base->clock_was_set = 0; + clock_was_set(); + } + - hrtimer_peek_ahead_timers(); + hrtimer_rt_run_pending(); } -#else /* CONFIG_HIGH_RES_TIMERS */ - -static inline void __hrtimer_peek_ahead_timers(void) { } - -#endif /* !CONFIG_HIGH_RES_TIMERS */ - /* * Called from timer softirq every jiffy, expire hrtimers: * diff --cc kernel/time/ntp.c index 419cbaa,61fc450..0000000 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@@ -389,24 -406,7 +406,6 @@@ int second_overflow(unsigned long secs break; } - write_seqcount_end(&xtime_seq); - raw_spin_unlock(&xtime_lock); -- - return res; - } - - /* - * this routine handles the overflow of the microsecond field - * - * The tricky bits of code to handle the accurate clock support - * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. - * They were originally developed for SUN and DEC kernels. - * All the kudos should go to Dave for this stuff. - */ - void second_overflow(void) - { - s64 delta; - /* Bump the maxerror field */ time_maxerror += MAXFREQ / NSEC_PER_USEC; if (time_maxerror > NTP_PHASE_LIMIT) { diff --cc kernel/time/timekeeping.c index 6ac6438,678ae31..0000000 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@@ -376,14 -395,9 +396,10 @@@ int do_settimeofday(const struct timesp xtime = *tv; - timekeeper.ntp_error = 0; - ntp_clear(); - - update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, - timekeeper.mult); + timekeeping_update(true); - write_sequnlock_irqrestore(&xtime_lock, flags); + write_seqcount_end(&xtime_seq); + raw_spin_unlock_irqrestore(&xtime_lock, flags); /* signal hrtimers about time change */ clock_was_set(); @@@ -415,14 -428,9 +431,10 @@@ int timekeeping_inject_offset(struct ti xtime = timespec_add(xtime, *ts); wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts); - timekeeper.ntp_error = 0; - ntp_clear(); - - update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, - timekeeper.mult); + timekeeping_update(true); - write_sequnlock_irqrestore(&xtime_lock, flags); + write_seqcount_end(&xtime_seq); + raw_spin_unlock_irqrestore(&xtime_lock, flags); /* signal hrtimers about time change */ clock_was_set(); @@@ -596,10 -603,10 +608,11 @@@ void __init timekeeping_init(void } set_normalized_timespec(&wall_to_monotonic, -boot.tv_sec, -boot.tv_nsec); + update_rt_offset(); total_sleep_time.tv_sec = 0; total_sleep_time.tv_nsec = 0; - write_sequnlock_irqrestore(&xtime_lock, flags); + write_seqcount_end(&xtime_seq); + raw_spin_unlock_irqrestore(&xtime_lock, flags); } /* time in seconds when suspend began */ @@@ -646,13 -658,9 +665,10 @@@ void timekeeping_inject_sleeptime(struc __timekeeping_inject_sleeptime(delta); - timekeeper.ntp_error = 0; - ntp_clear(); - update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, - timekeeper.mult); + timekeeping_update(true); - write_sequnlock_irqrestore(&xtime_lock, flags); + write_seqcount_end(&xtime_seq); + raw_spin_unlock_irqrestore(&xtime_lock, flags); /* signal hrtimers about time change */ clock_was_set(); @@@ -686,8 -693,8 +702,9 @@@ static void timekeeping_resume(void timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); timekeeper.ntp_error = 0; timekeeping_suspended = 0; + timekeeping_update(false); - write_sequnlock_irqrestore(&xtime_lock, flags); + write_seqcount_end(&xtime_seq); + raw_spin_unlock_irqrestore(&xtime_lock, flags); touch_softlockup_watchdog(); @@@ -1111,9 -1124,43 +1136,43 @@@ void get_xtime_and_monotonic_and_sleep_ *xtim = xtime; *wtom = wall_to_monotonic; *sleep = total_sleep_time; - } while (read_seqretry(&xtime_lock, seq)); + } while (read_seqcount_retry(&xtime_seq, seq)); } + #ifdef CONFIG_HIGH_RES_TIMERS + /** + * ktime_get_update_offsets - hrtimer helper + * @real: pointer to storage for monotonic -> realtime offset + * @_boot: pointer to storage for monotonic -> boottime offset + * + * Returns current monotonic time and updates the offsets + * Called from hrtimer_interupt() or retrigger_next_event() + */ + ktime_t ktime_get_update_offsets(ktime_t *real, ktime_t *boot) + { + ktime_t now; + unsigned int seq; + u64 secs, nsecs; + + do { - seq = read_seqbegin(&xtime_lock); ++ seq = read_seqcount_begin(&xtime_seq); + + secs = xtime.tv_sec; + nsecs = xtime.tv_nsec; + nsecs += timekeeping_get_ns(); + /* If arch requires, add in gettimeoffset() */ + nsecs += arch_gettimeoffset(); + + *real = offs_real; + *boot = offs_boot; - } while (read_seqretry(&xtime_lock, seq)); ++ } while (read_seqcount_retry(&xtime_seq, seq)); + + now = ktime_add_ns(ktime_set(secs, 0), nsecs); + now = ktime_sub(now, *real); + return now; + } + #endif + /** * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format */ Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c240
1 files changed, 154 insertions, 86 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index 14d0a6a632f6..480714b6f3fd 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -220,6 +220,56 @@ out:
pte_unmap_unlock(ptep, ptl);
}
+#ifdef CONFIG_BLOCK
+/* Returns true if all buffers are successfully locked */
+static bool buffer_migrate_lock_buffers(struct buffer_head *head,
+ enum migrate_mode mode)
+{
+ struct buffer_head *bh = head;
+
+ /* Simple case, sync compaction */
+ if (mode != MIGRATE_ASYNC) {
+ do {
+ get_bh(bh);
+ lock_buffer(bh);
+ bh = bh->b_this_page;
+
+ } while (bh != head);
+
+ return true;
+ }
+
+ /* async case, we cannot block on lock_buffer so use trylock_buffer */
+ do {
+ get_bh(bh);
+ if (!trylock_buffer(bh)) {
+ /*
+ * We failed to lock the buffer and cannot stall in
+ * async migration. Release the taken locks
+ */
+ struct buffer_head *failed_bh = bh;
+ put_bh(failed_bh);
+ bh = head;
+ while (bh != failed_bh) {
+ unlock_buffer(bh);
+ put_bh(bh);
+ bh = bh->b_this_page;
+ }
+ return false;
+ }
+
+ bh = bh->b_this_page;
+ } while (bh != head);
+ return true;
+}
+#else
+static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
+ enum migrate_mode mode)
+{
+ return true;
+}
+#endif /* CONFIG_BLOCK */
+
/*
* Replace the page in the mapping.
*
@@ -229,7 +279,8 @@ out:
* 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
*/
static int migrate_page_move_mapping(struct address_space *mapping,
- struct page *newpage, struct page *page)
+ struct page *newpage, struct page *page,
+ struct buffer_head *head, enum migrate_mode mode)
{
int expected_count;
void **pslot;
@@ -259,6 +310,20 @@ static int migrate_page_move_mapping(struct address_space *mapping,
}
/*
+ * In the async migration case of moving a page with buffers, lock the
+ * buffers using trylock before the mapping is moved. If the mapping
+ * was moved, we later failed to lock the buffers and could not move
+ * the mapping back due to an elevated page count, we would have to
+ * block waiting on other references to be dropped.
+ */
+ if (mode == MIGRATE_ASYNC && head &&
+ !buffer_migrate_lock_buffers(head, mode)) {
+ page_unfreeze_refs(page, expected_count);
+ spin_unlock_irq(&mapping->tree_lock);
+ return -EAGAIN;
+ }
+
+ /*
* Now we know that no one else is looking at the page.
*/
get_page(newpage); /* add cache reference */
@@ -415,13 +480,14 @@ EXPORT_SYMBOL(fail_migrate_page);
* Pages are locked upon entry and exit.
*/
int migrate_page(struct address_space *mapping,
- struct page *newpage, struct page *page)
+ struct page *newpage, struct page *page,
+ enum migrate_mode mode)
{
int rc;
BUG_ON(PageWriteback(page)); /* Writeback must be complete */
- rc = migrate_page_move_mapping(mapping, newpage, page);
+ rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode);
if (rc)
return rc;
@@ -438,28 +504,28 @@ EXPORT_SYMBOL(migrate_page);
* exist.
*/
int buffer_migrate_page(struct address_space *mapping,
- struct page *newpage, struct page *page)
+ struct page *newpage, struct page *page, enum migrate_mode mode)
{
struct buffer_head *bh, *head;
int rc;
if (!page_has_buffers(page))
- return migrate_page(mapping, newpage, page);
+ return migrate_page(mapping, newpage, page, mode);
head = page_buffers(page);
- rc = migrate_page_move_mapping(mapping, newpage, page);
+ rc = migrate_page_move_mapping(mapping, newpage, page, head, mode);
if (rc)
return rc;
- bh = head;
- do {
- get_bh(bh);
- lock_buffer(bh);
- bh = bh->b_this_page;
-
- } while (bh != head);
+ /*
+ * In the async case, migrate_page_move_mapping locked the buffers
+ * with an IRQ-safe spinlock held. In the sync case, the buffers
+ * need to be locked now
+ */
+ if (mode != MIGRATE_ASYNC)
+ BUG_ON(!buffer_migrate_lock_buffers(head, mode));
ClearPagePrivate(page);
set_page_private(newpage, page_private(page));
@@ -536,10 +602,14 @@ static int writeout(struct address_space *mapping, struct page *page)
* Default handling if a filesystem does not provide a migration function.
*/
static int fallback_migrate_page(struct address_space *mapping,
- struct page *newpage, struct page *page)
+ struct page *newpage, struct page *page, enum migrate_mode mode)
{
- if (PageDirty(page))
+ if (PageDirty(page)) {
+ /* Only writeback pages in full synchronous migration */
+ if (mode != MIGRATE_SYNC)
+ return -EBUSY;
return writeout(mapping, page);
+ }
/*
* Buffers may be managed in a filesystem specific way.
@@ -549,7 +619,7 @@ static int fallback_migrate_page(struct address_space *mapping,
!try_to_release_page(page, GFP_KERNEL))
return -EAGAIN;
- return migrate_page(mapping, newpage, page);
+ return migrate_page(mapping, newpage, page, mode);
}
/*
@@ -564,7 +634,7 @@ static int fallback_migrate_page(struct address_space *mapping,
* == 0 - success
*/
static int move_to_new_page(struct page *newpage, struct page *page,
- int remap_swapcache, bool sync)
+ int remap_swapcache, enum migrate_mode mode)
{
struct address_space *mapping;
int rc;
@@ -585,29 +655,18 @@ static int move_to_new_page(struct page *newpage, struct page *page,
mapping = page_mapping(page);
if (!mapping)
- rc = migrate_page(mapping, newpage, page);
- else {
+ rc = migrate_page(mapping, newpage, page, mode);
+ else if (mapping->a_ops->migratepage)
/*
- * Do not writeback pages if !sync and migratepage is
- * not pointing to migrate_page() which is nonblocking
- * (swapcache/tmpfs uses migratepage = migrate_page).
+ * Most pages have a mapping and most filesystems provide a
+ * migratepage callback. Anonymous pages are part of swap
+ * space which also has its own migratepage callback. This
+ * is the most common path for page migration.
*/
- if (PageDirty(page) && !sync &&
- mapping->a_ops->migratepage != migrate_page)
- rc = -EBUSY;
- else if (mapping->a_ops->migratepage)
- /*
- * Most pages have a mapping and most filesystems
- * should provide a migration function. Anonymous
- * pages are part of swap space which also has its
- * own migration function. This is the most common
- * path for page migration.
- */
- rc = mapping->a_ops->migratepage(mapping,
- newpage, page);
- else
- rc = fallback_migrate_page(mapping, newpage, page);
- }
+ rc = mapping->a_ops->migratepage(mapping,
+ newpage, page, mode);
+ else
+ rc = fallback_migrate_page(mapping, newpage, page, mode);
if (rc) {
newpage->mapping = NULL;
@@ -621,38 +680,18 @@ static int move_to_new_page(struct page *newpage, struct page *page,
return rc;
}
-/*
- * Obtain the lock on page, remove all ptes and migrate the page
- * to the newly allocated page in newpage.
- */
-static int unmap_and_move(new_page_t get_new_page, unsigned long private,
- struct page *page, int force, bool offlining, bool sync)
+static int __unmap_and_move(struct page *page, struct page *newpage,
+ int force, bool offlining, enum migrate_mode mode)
{
- int rc = 0;
- int *result = NULL;
- struct page *newpage = get_new_page(page, private, &result);
+ int rc = -EAGAIN;
int remap_swapcache = 1;
int charge = 0;
struct mem_cgroup *mem;
struct anon_vma *anon_vma = NULL;
- if (!newpage)
- return -ENOMEM;
-
- if (page_count(page) == 1) {
- /* page was freed from under us. So we are done. */
- goto move_newpage;
- }
- if (unlikely(PageTransHuge(page)))
- if (unlikely(split_huge_page(page)))
- goto move_newpage;
-
- /* prepare cgroup just returns 0 or -ENOMEM */
- rc = -EAGAIN;
-
if (!trylock_page(page)) {
- if (!force || !sync)
- goto move_newpage;
+ if (!force || mode == MIGRATE_ASYNC)
+ goto out;
/*
* It's not safe for direct compaction to call lock_page.
@@ -668,7 +707,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
* altogether.
*/
if (current->flags & PF_MEMALLOC)
- goto move_newpage;
+ goto out;
lock_page(page);
}
@@ -697,10 +736,12 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
if (PageWriteback(page)) {
/*
- * For !sync, there is no point retrying as the retry loop
- * is expected to be too short for PageWriteback to be cleared
+ * Only in the case of a full syncronous migration is it
+ * necessary to wait for PageWriteback. In the async case,
+ * the retry loop is too short and in the sync-light case,
+ * the overhead of stalling is too much
*/
- if (!sync) {
+ if (mode != MIGRATE_SYNC) {
rc = -EBUSY;
goto uncharge;
}
@@ -771,7 +812,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
skip_unmap:
if (!page_mapped(page))
- rc = move_to_new_page(newpage, page, remap_swapcache, sync);
+ rc = move_to_new_page(newpage, page, remap_swapcache, mode);
if (rc && remap_swapcache)
remove_migration_ptes(page, page);
@@ -785,27 +826,53 @@ uncharge:
mem_cgroup_end_migration(mem, page, newpage, rc == 0);
unlock:
unlock_page(page);
+out:
+ return rc;
+}
-move_newpage:
+/*
+ * Obtain the lock on page, remove all ptes and migrate the page
+ * to the newly allocated page in newpage.
+ */
+static int unmap_and_move(new_page_t get_new_page, unsigned long private,
+ struct page *page, int force, bool offlining,
+ enum migrate_mode mode)
+{
+ int rc = 0;
+ int *result = NULL;
+ struct page *newpage = get_new_page(page, private, &result);
+
+ if (!newpage)
+ return -ENOMEM;
+
+ if (page_count(page) == 1) {
+ /* page was freed from under us. So we are done. */
+ goto out;
+ }
+
+ if (unlikely(PageTransHuge(page)))
+ if (unlikely(split_huge_page(page)))
+ goto out;
+
+ rc = __unmap_and_move(page, newpage, force, offlining, mode);
+out:
if (rc != -EAGAIN) {
- /*
- * A page that has been migrated has all references
- * removed and will be freed. A page that has not been
- * migrated will have kepts its references and be
- * restored.
- */
- list_del(&page->lru);
+ /*
+ * A page that has been migrated has all references
+ * removed and will be freed. A page that has not been
+ * migrated will have kepts its references and be
+ * restored.
+ */
+ list_del(&page->lru);
dec_zone_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
putback_lru_page(page);
}
-
/*
* Move the new page to the LRU. If migration was not successful
* then this will free the page.
*/
putback_lru_page(newpage);
-
if (result) {
if (rc)
*result = rc;
@@ -835,7 +902,8 @@ move_newpage:
*/
static int unmap_and_move_huge_page(new_page_t get_new_page,
unsigned long private, struct page *hpage,
- int force, bool offlining, bool sync)
+ int force, bool offlining,
+ enum migrate_mode mode)
{
int rc = 0;
int *result = NULL;
@@ -848,7 +916,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
rc = -EAGAIN;
if (!trylock_page(hpage)) {
- if (!force || !sync)
+ if (!force || mode != MIGRATE_SYNC)
goto out;
lock_page(hpage);
}
@@ -859,7 +927,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
if (!page_mapped(hpage))
- rc = move_to_new_page(new_hpage, hpage, 1, sync);
+ rc = move_to_new_page(new_hpage, hpage, 1, mode);
if (rc)
remove_migration_ptes(hpage, hpage);
@@ -902,7 +970,7 @@ out:
*/
int migrate_pages(struct list_head *from,
new_page_t get_new_page, unsigned long private, bool offlining,
- bool sync)
+ enum migrate_mode mode)
{
int retry = 1;
int nr_failed = 0;
@@ -923,7 +991,7 @@ int migrate_pages(struct list_head *from,
rc = unmap_and_move(get_new_page, private,
page, pass > 2, offlining,
- sync);
+ mode);
switch(rc) {
case -ENOMEM:
@@ -953,7 +1021,7 @@ out:
int migrate_huge_pages(struct list_head *from,
new_page_t get_new_page, unsigned long private, bool offlining,
- bool sync)
+ enum migrate_mode mode)
{
int retry = 1;
int nr_failed = 0;
@@ -970,7 +1038,7 @@ int migrate_huge_pages(struct list_head *from,
rc = unmap_and_move_huge_page(get_new_page,
private, page, pass > 2, offlining,
- sync);
+ mode);
switch(rc) {
case -ENOMEM:
@@ -1099,7 +1167,7 @@ set_status:
err = 0;
if (!list_empty(&pagelist)) {
err = migrate_pages(&pagelist, new_page_node,
- (unsigned long)pm, 0, true);
+ (unsigned long)pm, 0, MIGRATE_SYNC);
if (err)
putback_lru_pages(&pagelist);
}